diff --git a/.gitignore b/.gitignore index 0c2e975..ab2ae73 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,11 @@ .tox *.pyc .eggs/ +*.pot +*.mo +*.po +.pytest_cache *.egg-info/ _build +.benchmarks +.hypothesis diff --git a/fluent.runtime/CHANGELOG.rst b/fluent.runtime/CHANGELOG.rst index 76eeb18..9b6208a 100644 --- a/fluent.runtime/CHANGELOG.rst +++ b/fluent.runtime/CHANGELOG.rst @@ -8,6 +8,7 @@ fluent.runtime development version (unreleased) terms. * Refined error handling regarding function calls to be more tolerant of errors in FTL files, while silencing developer errors less. +* Added ``CompilingFluentBundle`` implementation. fluent.runtime 0.1 (January 21, 2019) ------------------------------------- diff --git a/fluent.runtime/docs/implementations.rst b/fluent.runtime/docs/implementations.rst new file mode 100644 index 0000000..a5d09e5 --- /dev/null +++ b/fluent.runtime/docs/implementations.rst @@ -0,0 +1,115 @@ +FluentBundle Implementations +============================ + +python-fluent comes with two implementations of ``FluentBundle``. The default is +``fluent.runtime.InterpretingFluentBundle``, which is what you get under the +alias ``fluent.runtime.FluentBundle``. It implements an interpreter for the FTL +Abstract Syntax Tree. + +The alternative is ``fluent.runtime.CompilingFluentBundle``, which works by +compiling a set of FTL messages to a set of Python functions using Python `ast +`_. This results in very good +performance (see below for more info). + +While the two implementations have the same API, and return the same values +under most situations, there are some differences, as follows: + +* ``InterpretingFluentBundle`` has some protection against malicious FTL input + which could attempt things like a `billion laughs attack + `_ to consume a large + amount of memory or CPU time. For the sake of performance, + ``CompilingFluentBundle`` does not have these protections. + + It should be noted that both implementations are able to detect and stop + infinite recursion errors (``CompilingFluentBundle`` does this at compile + time), which is important to stop infinite loops and memory exhaustion which + could otherwise occur due to accidental cyclic references in messages. + +* While the error handling strategy for both implementations is the same, when + errors occur (e.g. a missing value in the arguments dictionary, or a cyclic + reference, or a string is passed to ``NUMBER()`` builtin), the exact errors + returned by ``format`` may be different between the two implementations. + + Also, when an error occurs, in some cases (such as a cyclic reference), the + error string embedded into the returned formatted message may be different. + For cases where there is no error, the output is identical (or should be). + + Neither implementations guarantees that the exact errors returned will be the + same between different versions of ``fluent.runtime``. + +Performance +----------- + +Due to the strategy of compiling to Python, ``CompilingFluentBundle`` has very +good performance, especially for the simple common cases. The +``tools/benchmark/gettext_comparisons.py`` script includes some benchmarks that +compare speed to GNU gettext as a reference. Below is a rough summary: + +For the simple but very common case of a message defining a static string, +``CompilingFluentBundle.format`` is very close to GNU gettext, or much faster, +depending on whether you are using Python 2 or 3, and your Python implementation +(e.g. CPython or PyPy). (The worst case we found was 5% faster than gettext on +CPython 2.7, and the best case was about 3.5 times faster for PyPy2 5.1.2). For +cases of substituting a single string into a message, +``CompilingFluentBundle.format`` is between 30% slower and 70% faster than an +equivalent implementation using GNU gettext and Python ``%`` interpolation. + +For message where plural rules are involved, currently ``CompilingFluentBundle`` +can be significantly slower than using GNU gettext, partly because it uses +plural rules from CLDR that can be much more complex (and correct) than the ones +that gettext normally does. Further work could be done to optimize some of these +cases though. + +For more complex operations (for example, using locale-aware date and number +formatting), formatting messages can take a lot longer. Comparisons to GNU +gettext fall down at this point, because it doesn't include a lot of this +functionality. However, usually these types of messages make up a small fraction +of the number of internationalized strings in an application. + +``InterpretingFluentBundle`` is, as you would expect, much slower that +``CompilingFluentBundle``, often by a factor of 10. In cases where there are a +large number of messages, ``CompilingFluentBundle`` will be a lot slower to +format the first message because it first compiles all the messages, whereas +``InterpretingFluentBundle`` does not have this compilation step, and tries to +reduce any up-front work to a minimum (sometimes at the cost of runtime +performance). + + +Security +-------- + +You should not pass un-trusted FTL code to ``FluentBundle.add_messages``. This +is because carefully constructed messages could potentially cause large resource +usage (CPU time and memory). The ``InterpretingFluentBundle`` implementation +does have some protection against these attacks, although it may not be +foolproof, while ``CompilingFluentBundle`` does not have any protection against +these attacks, either at compile time or run time. + +``CompilingFluentBundle`` works by compiling FTL messages to Python `ast +`_, which is passed to `compile +`_ and then `exec +`_. The use of ``exec`` +like this is an established technique for high performance Python code, used in +template engines like Mako, Jinja2 and Genshi. + +However, there can understandably be some concerns around the use of ``exec`` +which can open up remote execution vulnerabilities. If this is of paramount +concern to you, you should consider using ``InterpretingFluentBundle`` instead +(which is the default). + +To reduce the possibility of our use of ``exec`` harbouring security issues, the +following things are in place: + +1. We generate `ast `_ objects and + not strings. This greatly reduces the security problems, since there is no + possibility of a vulnerability due to incorrect string interpolation. + +2. We use ``exec`` only on AST derived from FTL files, never on "end user input" + (such as the arguments passed into ``FluentBundle.format``). This reduces the + attack vector to only the situation where the source of your FTL files is + potentially malicious or compromised. + +3. We employ defence-in-depth techniques in our code generation and compiler + implementation to reduce the possibility of a cleverly crafted FTL code + producing security holes, and ensure these techniques have full test + coverage. diff --git a/fluent.runtime/docs/usage.rst b/fluent.runtime/docs/usage.rst index c54b49a..f3c272c 100644 --- a/fluent.runtime/docs/usage.rst +++ b/fluent.runtime/docs/usage.rst @@ -93,6 +93,19 @@ module or the start of your repl session: from __future__ import unicode_literals +CompilingFluentBundle +~~~~~~~~~~~~~~~~~~~~~ + +In addition to the default ``FluentBundle`` implementation, there is also a high +performance implementation that compilers to Python AST. You can use it just the same: + +.. code-block:: python + + from fluent.runtime import CompilingFluentBundle as FluentBundle + +Be sure to check the notes on :doc:`implementations`, especially the security +section. + Numbers ~~~~~~~ @@ -225,5 +238,6 @@ Help with the above would be welcome! Other features and further information -------------------------------------- +* :doc:`implementations` * :doc:`functions` * :doc:`errors` diff --git a/fluent.runtime/fluent/runtime/__init__.py b/fluent.runtime/fluent/runtime/__init__.py index d115946..346584f 100644 --- a/fluent.runtime/fluent/runtime/__init__.py +++ b/fluent.runtime/fluent/runtime/__init__.py @@ -1,19 +1,23 @@ from __future__ import absolute_import, unicode_literals +from collections import OrderedDict + import babel import babel.numbers import babel.plural from fluent.syntax import FluentParser -from fluent.syntax.ast import Message, Term +from fluent.syntax.ast import Junk, Message, Term from .builtins import BUILTINS +from .compiler import compile_messages +from .errors import FluentDuplicateMessageId, FluentJunkFound from .prepare import Compiler -from .resolver import ResolverEnvironment, CurrentEnvironment +from .resolver import CurrentEnvironment, ResolverEnvironment from .utils import ATTRIBUTE_SEPARATOR, TERM_SIGIL, ast_to_id, native_to_fluent -class FluentBundle(object): +class FluentBundleBase(object): """ Message contexts are single-language stores of translations. They are responsible for parsing translation resources in the Fluent syntax and can @@ -33,27 +37,60 @@ def __init__(self, locales, functions=None, use_isolating=True): _functions.update(functions) self._functions = _functions self.use_isolating = use_isolating - self._messages_and_terms = {} - self._compiled = {} - self._compiler = Compiler() + self._messages_and_terms = OrderedDict() + self._parsing_issues = [] self._babel_locale = self._get_babel_locale() self._plural_form = babel.plural.to_python(self._babel_locale.plural_form) def add_messages(self, source): parser = FluentParser() resource = parser.parse(source) - # TODO - warn/error about duplicates for item in resource.body: if isinstance(item, (Message, Term)): full_id = ast_to_id(item) - if full_id not in self._messages_and_terms: + if full_id in self._messages_and_terms: + self._parsing_issues.append((full_id, FluentDuplicateMessageId( + "Additional definition for '{0}' discarded.".format(full_id)))) + else: self._messages_and_terms[full_id] = item + elif isinstance(item, Junk): + self._parsing_issues.append( + (None, FluentJunkFound("Junk found: " + + '; '.join(a.message for a in item.annotations), + item.annotations))) def has_message(self, message_id): if message_id.startswith(TERM_SIGIL) or ATTRIBUTE_SEPARATOR in message_id: return False return message_id in self._messages_and_terms + def _get_babel_locale(self): + for l in self.locales: + try: + return babel.Locale.parse(l.replace('-', '_')) + except babel.UnknownLocaleError: + continue + # TODO - log error + return babel.Locale.default() + + def format(self, message_id, args=None): + raise NotImplementedError() + + def check_messages(self): + """ + Check messages for errors and return as a list of two tuples: + (message ID or None, exception object) + """ + raise NotImplementedError() + + +class InterpretingFluentBundle(FluentBundleBase): + + def __init__(self, locales, functions=None, use_isolating=True): + super(InterpretingFluentBundle, self).__init__(locales, functions=functions, use_isolating=use_isolating) + self._compiled = {} + self._compiler = Compiler() + def lookup(self, full_id): if full_id not in self._compiled: entry_id = full_id.split(ATTRIBUTE_SEPARATOR, 1)[0] @@ -83,11 +120,55 @@ def format(self, message_id, args=None): errors=errors) return [resolve(env), errors] - def _get_babel_locale(self): - for l in self.locales: - try: - return babel.Locale.parse(l.replace('-', '_')) - except babel.UnknownLocaleError: - continue - # TODO - log error - return babel.Locale.default() + def check_messages(self): + return self._parsing_issues[:] + + +class CompilingFluentBundle(FluentBundleBase): + def __init__(self, *args, **kwargs): + super(CompilingFluentBundle, self).__init__(*args, **kwargs) + self._mark_dirty() + + def _mark_dirty(self): + self._is_dirty = True + # Clear out old compilation errors, they might not apply if we + # re-compile: + self._compilation_errors = [] + self.format = self._compile_and_format + + def _mark_clean(self): + self._is_dirty = False + self.format = self._format + + def add_messages(self, source): + super(CompilingFluentBundle, self).add_messages(source) + self._mark_dirty() + + def _compile(self): + self._compiled_messages, self._compilation_errors = compile_messages( + self._messages_and_terms, + self._babel_locale, + use_isolating=self.use_isolating, + functions=self._functions) + self._mark_clean() + + # 'format' is the hot path for many scenarios, so we try to optimize it. To + # avoid having to check '_is_dirty' inside 'format', we switch 'format' from + # '_compile_and_format' to '_format' when compilation is done. This gives us + # about 10% improvement for the simplest (but most common) case of an + # entirely static string. + def _compile_and_format(self, message_id, args=None): + self._compile() + return self._format(message_id, args) + + def _format(self, message_id, args=None): + errors = [] + return self._compiled_messages[message_id](args, errors), errors + + def check_messages(self): + if self._is_dirty: + self._compile() + return self._parsing_issues + self._compilation_errors + + +FluentBundle = InterpretingFluentBundle diff --git a/fluent.runtime/fluent/runtime/ast_compat.py b/fluent.runtime/fluent/runtime/ast_compat.py new file mode 100644 index 0000000..3691dec --- /dev/null +++ b/fluent.runtime/fluent/runtime/ast_compat.py @@ -0,0 +1,92 @@ +""" +Compatibility module for generating Python AST. + +The interface mocks the stdlib 'ast' module of the most recent Python version we +support, so that the codegen module can be written as if it targets that +version. For older versions we provide shims that adapt to the older AST as and +when necessary. + +""" +import ast +import sys + +PY2 = sys.version_info < (3, 0) + +# We include only the things codegen needs. +Assign = ast.Assign +BoolOp = ast.BoolOp +Compare = ast.Compare +Dict = ast.Dict +Eq = ast.Eq +ExceptHandler = ast.ExceptHandler +Expr = ast.Expr +If = ast.If +Index = ast.Index +List = ast.List +Load = ast.Load +Module = ast.Module +Num = ast.Num +Or = ast.Or +Pass = ast.Pass +Return = ast.Return +Store = ast.Store +Str = ast.Str +Subscript = ast.Subscript +Tuple = ast.Tuple +arguments = ast.arguments + +if PY2: + # Python 2 needs identifiers to be bytestrings, not unicode strings: + def change_attrs_to_str(ast_class, attr_list): + def wrapper(**kwargs): + for attr in attr_list: + if attr in kwargs and isinstance(kwargs[attr], unicode): + kwargs[attr] = str(kwargs[attr]) + return ast_class(**kwargs) + return wrapper + + Attribute = change_attrs_to_str(ast.Attribute, ['attr']) + + def Call(func=None, args=[], keywords=[], **other_args): + # For **expr syntax: + # - in Python 2 Ast, we have ast.Call(kwargs=expr) + # - in Python 3 Ast, we have ast.Call(keywords=keywords) where + # `keywords` contains a special item: `keyword(arg=None, value=expr)`. + # Here we convert Python 3 convention back to Python 2 Ast. + kwargs = None + python_2_keywords = [] + for k in keywords: + if k.arg is None: + kwargs = k.value + else: + python_2_keywords.append(k) + return ast.Call(func=func, + args=args, + keywords=python_2_keywords, + kwargs=kwargs, + **other_args) + + FunctionDef = change_attrs_to_str(ast.FunctionDef, ['name']) + Name = change_attrs_to_str(ast.Name, ['id']) + + def NameConstant(value=None, **kwargs): + if value is None: + return Name(id='None', ctx=ast.Load(), **kwargs) + else: + raise AssertionError("Don't know how to translate NameConstant(value={!r})".format(value)) + + Try = ast.TryExcept + + def arg(arg=None, annotation=None, **kwargs): + return Name(id=str(arg), ctx=ast.Param(), **kwargs) + + keyword = change_attrs_to_str(ast.keyword, ['arg']) +else: + Attribute = ast.Attribute + Call = ast.Call + FunctionDef = ast.FunctionDef + Name = ast.Name + NameConstant = ast.NameConstant + Try = ast.Try + arg = ast.arg + keyword = ast.keyword diff --git a/fluent.runtime/fluent/runtime/codegen.py b/fluent.runtime/fluent/runtime/codegen.py new file mode 100644 index 0000000..db811f1 --- /dev/null +++ b/fluent.runtime/fluent/runtime/codegen.py @@ -0,0 +1,799 @@ +""" +Utilities for doing Python code generation +""" +from __future__ import absolute_import, unicode_literals + +import keyword +import re +import sys + +from six import text_type + +from . import ast_compat as ast +from .utils import allowable_keyword_arg_name, allowable_name + +# This module provides simple utilities for building up Python source code. It +# implements only what is really needed by compiler.py, with a number of aims +# and constraints: +# +# 1. Performance. +# +# The resulting Python code should do as little as possible, especially for +# simple cases (which are by far the most common for .ftl files) +# +# 2. Correctness (obviously) +# +# In particular, we should try to make it hard to generate code that is +# syntactically correct and therefore compiles but doesn't work. We try to +# make it hard to generate accidental name clashes, or use variables that are +# not defined. +# +# Correctness also has a security implication, since the result of this code +# is 'exec'ed. To that end: +# * We build up AST, rather than strings. This eliminates many +# potential bugs caused by wrong escaping/interpolation. +# * the `as_ast()` methods are paranoid about input, and do many asserts. +# We do this even though other layers will usually have checked the +# input, to allow us to reason locally when checking these methods. These +# asserts must also have 100% code coverage. +# +# 3. Simplicity +# +# The resulting Python code should be easy to read and understand. +# +# 4. Predictability +# +# Since we want to test the resulting source code, we have made some design +# decisions that aim to ensure things like function argument names are +# consistent and so can be predicted easily. + + +PROPERTY_TYPE = 'PROPERTY_TYPE' +PROPERTY_RETURN_TYPE = 'PROPERTY_RETURN_TYPE' +UNKNOWN_TYPE = object +SENSITIVE_FUNCTIONS = [ + # builtin functions that we should never be calling from our code + # generation. This is a defense-in-depth mechansim to stop our code + # generation become a code exectution vulnerability, we also have + # higher level code that ensures we are not generating calls + # to arbitrary Python functions. + + # This is not a comprehensive list of functions we are not using, but + # functions we definitly don't need and are most likely to be used to + # execute remote code or to get around safety mechanisms. + '__import__', + '__build_class__', + 'apply', + 'compile', + 'eval', + 'exec', + 'execfile', + 'exit', + 'file', + 'globals', + 'locals', + 'open', + 'object', + 'reload', + 'type', +] + + +class PythonAst(object): + """ + Base class representing a simplified Python AST (not the real one). + Generates real `ast.*` nodes via `as_ast()` method. + """ + def simplify(self, changes, simplifier): + """ + Simplify the statement/expression, returning either a modified + self, or a new object. + + This method should call .simplify(changes) on any contained subexpressions + or statements. + + If changes were made, a True value must be appended to the passed in changes list. + + It should also run the callable simplifier on any returned values (this + is an externally passed in function that may do additional higher level + simplifications) + + """ + return self + + def as_ast(self): + raise NotImplementedError("{!r}.as_ast()".format(self.__class__)) + + +class PythonAstList(object): + """ + Alternative base class to PythonAst when we have code that wants to return a + list of AST objects. + """ + def as_ast_list(self): + raise NotImplementedError("{!r}.as_ast_list()".format(self.__class__)) + + +# `compiler` needs these attributes on AST nodes. +# We don't have anything sensible we can put here so we put arbitrary values. +DEFAULT_AST_ARGS = dict(lineno=1, col_offset=1) + + +class Scope(object): + def __init__(self, parent_scope=None): + self.parent_scope = parent_scope + self.names = set() + self._function_arg_reserved_names = set() + self._properties = {} + self._assignments = {} + + def names_in_use(self): + names = self.names + if self.parent_scope is not None: + names = names | self.parent_scope.names_in_use() + return names + + def function_arg_reserved_names(self): + names = self._function_arg_reserved_names + if self.parent_scope is not None: + names = names | self.parent_scope.function_arg_reserved_names() + return names + + def all_reserved_names(self): + return self.names_in_use() | self.function_arg_reserved_names() + + def reserve_name(self, requested, function_arg=False, is_builtin=False, properties=None): + """ + Reserve a name as being in use in a scope. + + Pass function_arg=True if this is a function argument. + 'properties' is an optional dict of additional properties + (e.g. the type associated with a name) + """ + def _add(final): + self.names.add(final) + self._properties[final] = properties or {} + return final + + if function_arg: + if requested in self.function_arg_reserved_names(): + assert requested not in self.names_in_use() + return _add(requested) + if requested in self.all_reserved_names(): + raise AssertionError("Cannot use '{0}' as argument name as it is already in use" + .format(requested)) + + cleaned = cleanup_name(requested) + + attempt = cleaned + count = 2 # instance without suffix is regarded as 1 + # To avoid shadowing of global names in local scope, we + # take into account parent scope when assigning names. + + used = self.all_reserved_names() + # We need to also protect against using keywords ('class', 'def' etc.) + # However, some builtins are also keywords (e.g. 'None'), and so + # if a builtin is being reserved, don't check against the keyword list + if not is_builtin: + used = used | set(keyword.kwlist) + while attempt in used: + attempt = cleaned + str(count) + count += 1 + return _add(attempt) + + def reserve_function_arg_name(self, name): + """ + Reserve a name for *later* use as a function argument. This does not result + in that name being considered 'in use' in the current scope, but will + avoid the name being assigned for any use other than as a function argument. + """ + # To keep things simple, and the generated code predictable, we reserve + # names for all function arguments in a separate scope, and insist on + # the exact names + if name in self.all_reserved_names(): + raise AssertionError("Can't reserve '{0}' as function arg name as it is already reserved" + .format(name)) + self._function_arg_reserved_names.add(name) + + def get_name_properties(self, name): + """ + Gets a dictionary of properties for the name. + Raises exception if the name is not reserved in this scope or parent + """ + if name in self._properties: + return self._properties[name] + return self.parent_scope.get_name_properties(name) + + def set_name_properties(self, name, props): + """ + Sets a dictionary of properties for the name. + Raises exception if the name is not reserved in this scope or parent. + """ + scope = self + while True: + if name in scope._properties: + scope._properties[name].update(props) + break + else: + scope = scope.parent_scope + + def find_names_by_property(self, prop_name, prop_val): + """ + Retrieve all names that match the supplied property name and value + """ + return [name + for name, props in self._properties.items() + for k, v in props.items() + if k == prop_name and v == prop_val] + + def has_assignment(self, name): + return name in self._assignments + + def register_assignment(self, name): + self._assignments[name] = None + + def variable(self, name): + # Convenience utility for returning a VariableReference + return VariableReference(name, self) + + +_IDENTIFIER_SANITIZER_RE = re.compile('[^a-zA-Z0-9_]') +_IDENTIFIER_START_RE = re.compile('^[a-zA-Z_]') + + +def cleanup_name(name): + # See https://docs.python.org/2/reference/lexical_analysis.html#grammar-token-identifier + name = _IDENTIFIER_SANITIZER_RE.sub('', name) + if not _IDENTIFIER_START_RE.match(name): + name = "n" + name + return name + + +class Statement(object): + pass + + +class _Assignment(Statement, PythonAst): + def __init__(self, name, value): + self.name = name + self.value = value + + def as_ast(self): + if not allowable_name(self.name): + raise AssertionError("Expected {0} to be a valid Python identifier".format(self.name)) + return ast.Assign( + targets=[ast.Name(id=self.name, + ctx=ast.Store(), + **DEFAULT_AST_ARGS)], + value=self.value.as_ast(), + **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.value = self.value.simplify(changes, simplifier) + return simplifier(self, changes) + + +class Block(PythonAstList): + def __init__(self, scope, parent_block=None): + self.scope = scope + self.statements = [] + self.parent_block = parent_block + + def as_ast_list(self, allow_empty=True): + retval = [] + for s in self.statements: + if hasattr(s, 'as_ast_list'): + retval.extend(s.as_ast_list(allow_empty=True)) + else: + if isinstance(s, Statement): + retval.append(s.as_ast()) + else: + # Things like bare function/method calls need to be wrapped + # in `Expr` to match the way Python parses. + retval.append(ast.Expr(s.as_ast(), **DEFAULT_AST_ARGS)) + + if len(retval) == 0 and not allow_empty: + return [ast.Pass(**DEFAULT_AST_ARGS)] + return retval + + def add_statement(self, statement): + self.statements.append(statement) + if isinstance(statement, Block): + if statement.parent_block is None: + statement.parent_block = self + else: + if statement.parent_block != self: + raise AssertionError("Block {0} is already child of {1}, can't reassign to {2}" + .format(statement, statement.parent_block, self)) + + # Safe alternatives to Block.statements being manipulated directly: + def add_assignment(self, name, value, allow_multiple=False): + """ + Adds an assigment of the form: + + x = value + """ + if name not in self.scope.names_in_use(): + raise AssertionError("Cannot assign to unreserved name '{0}'".format(name)) + + if self.scope.has_assignment(name): + if not allow_multiple: + raise AssertionError("Have already assigned to '{0}' in this scope".format(name)) + else: + self.scope.register_assignment(name) + + self.add_statement(_Assignment(name, value)) + + def add_function(self, func_name, func): + assert func.func_name == func_name + self.add_statement(func) + + def add_return(self, value): + self.add_statement(Return(value)) + + def has_assignment_for_name(self, name): + for s in self.statements: + if isinstance(s, _Assignment) and s.name == name: + return True + elif hasattr(s, 'has_assignment_for_name'): + if s.has_assignment_for_name(name): + return True + if self.parent_block is not None: + return self.parent_block.has_assignment_for_name(name) + return False + + def simplify(self, changes, simplifier): + self.statements = [s.simplify(changes, simplifier) for s in self.statements] + return simplifier(self, changes) + + +class Module(Block, PythonAst): + def __init__(self): + scope = Scope(parent_scope=None) + Block.__init__(self, scope) + + def as_ast(self): + return ast.Module(body=self.as_ast_list(), **DEFAULT_AST_ARGS) + + +class Function(Scope, Statement, PythonAst): + def __init__(self, name, args=None, parent_scope=None): + super(Function, self).__init__(parent_scope=parent_scope) + self.body = Block(self) + self.func_name = name + if args is None: + args = () + for arg in args: + if (arg in self.names_in_use()): + raise AssertionError("Can't use '{0}' as function argument name because it shadows other names" + .format(arg)) + self.reserve_name(arg, function_arg=True) + self.args = args + + def as_ast(self): + if not allowable_name(self.func_name): + raise AssertionError("Expected '{0}' to be a valid Python identifier".format(self.func_name)) + for arg in self.args: + if not allowable_name(arg): + raise AssertionError("Expected '{0}' to be a valid Python identifier".format(arg)) + return ast.FunctionDef( + name=self.func_name, + args=ast.arguments( + args=([ast.arg(arg=arg_name, annotation=None, + **DEFAULT_AST_ARGS) + for arg_name in self.args]), + vararg=None, + kwonlyargs=[], + kw_defaults=[], + kwarg=None, + defaults=[], + **DEFAULT_AST_ARGS), + body=self.body.as_ast_list(allow_empty=False), + decorator_list=[], + **DEFAULT_AST_ARGS) + + def add_return(self, value): + self.body.add_return(value) + + def simplify(self, changes, simplifier): + self.body = self.body.simplify(changes, simplifier) + return simplifier(self, changes) + + +class Return(Statement, PythonAst): + def __init__(self, value): + self.value = value + + def as_ast(self): + return ast.Return(self.value.as_ast(), **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.value = self.value.simplify(changes, simplifier) + return simplifier(self, changes) + + def __repr__(self): + return 'Return({0}'.format(repr(self.value)) + + +class If(Statement, PythonAstList): + def __init__(self, parent_scope, parent_block=None): + self.if_blocks = [] + self._conditions = [] + self.parent_block = parent_block + self.else_block = Block(parent_scope, parent_block=self.parent_block) + self._parent_scope = parent_scope + + def add_if(self, condition): + new_if = Block(self._parent_scope, parent_block=self.parent_block) + self.if_blocks.append(new_if) + self._conditions.append(condition) + return new_if + + # We implement as_ast_list here to allow us to return a list of statements + # in some cases. + def as_ast_list(self, allow_empty=True): + if len(self.if_blocks) == 0: + return self.else_block.as_ast_list(allow_empty=allow_empty) + if_ast = ast.If(orelse=[], **DEFAULT_AST_ARGS) + current_if = if_ast + previous_if = None + for condition, if_block in zip(self._conditions, self.if_blocks): + current_if.test = condition.as_ast() + current_if.body = if_block.as_ast_list() + if previous_if is not None: + previous_if.orelse.append(current_if) + + previous_if = current_if + current_if = ast.If(orelse=[], **DEFAULT_AST_ARGS) + + if self.else_block.statements: + previous_if.orelse = self.else_block.as_ast_list() + + return [if_ast] + + def simplify(self, changes, simplifier): + self.if_blocks = [block.simplify(changes, simplifier) for block in self.if_blocks] + self._conditions = [expr.simplify(changes, simplifier) for expr in self._conditions] + self.else_block = self.else_block.simplify(changes, simplifier) + if not self.if_blocks: + # Unusual case of no conditions, only default case, but it + # simplifies other code to be able to handle this uniformly. We can + # replace this if statement with a single unconditional block. + changes.append(True) + return simplifier(self.else_block, changes) + return simplifier(self, changes) + + +class Try(Statement, PythonAst): + def __init__(self, catch_exceptions, parent_scope): + self.catch_exceptions = catch_exceptions + self.try_block = Block(parent_scope) + self.except_block = Block(parent_scope) + self.else_block = Block(parent_scope) + + def as_ast(self): + return ast.Try( + body=self.try_block.as_ast_list(allow_empty=False), + handlers=[ast.ExceptHandler( + type=(self.catch_exceptions[0].as_ast() + if len(self.catch_exceptions) == 1 else + ast.Tuple(elts=[e.as_ast() for e in self.catch_exceptions], + ctx=ast.Load(), + **DEFAULT_AST_ARGS)), + name=None, + body=self.except_block.as_ast_list(allow_empty=False), + **DEFAULT_AST_ARGS)], + orelse=self.else_block.as_ast_list(allow_empty=True), + finalbody=[], + **DEFAULT_AST_ARGS) + + def has_assignment_for_name(self, name): + if ((self.try_block.has_assignment_for_name(name) or self.else_block.has_assignment_for_name(name)) and + self.except_block.has_assignment_for_name(name)): + return True + return False + + def simplify(self, changes, simplifier): + self.catch_exceptions = [e.simplify(changes, simplifier) for e in self.catch_exceptions] + self.try_block = self.try_block.simplify(changes, simplifier) + self.except_block = self.except_block.simplify(changes, simplifier) + self.else_block = self.else_block.simplify(changes, simplifier) + return simplifier(self, changes) + + +class Expression(PythonAst): + # type represents the Python type this expression will produce, + # if we know it (UNKNOWN_TYPE otherwise). + type = UNKNOWN_TYPE + + +class String(Expression): + type = text_type + + def __init__(self, string_value): + self.string_value = string_value + + def as_ast(self): + return ast.Str(self.string_value, **DEFAULT_AST_ARGS) + + def __repr__(self): + return 'String({0})'.format(repr(self.string_value)) + + def __eq__(self, other): + return isinstance(other, String) and other.string_value == self.string_value + + if sys.version_info < (3,): + # Python 2 does not implement __ne__ based on __eq__ + def __ne__(self, other): + return not self == other + + +class Number(Expression): + def __init__(self, number): + self.number = number + self.type = type(number) + + def as_ast(self): + return ast.Num(n=self.number, **DEFAULT_AST_ARGS) + + def __repr__(self): + return 'Number({0})'.format(repr(self.number)) + + +class List(Expression): + def __init__(self, items): + self.items = items + self.type = list + + def as_ast(self): + return ast.List( + elts=[i.as_ast() for i in self.items], + ctx=ast.Load(), + **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.items = [item.simplify(changes, simplifier) for item in self.items] + return simplifier(self, changes) + + +class Dict(Expression): + def __init__(self, pairs): + # pairs is a list of key-value pairs (PythonAst object, PythonAst object) + self.pairs = pairs + self.type = dict + + def as_ast(self): + return ast.Dict(keys=[k.as_ast() for k, v in self.pairs], + values=[v.as_ast() for k, v in self.pairs], + **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.pairs = [(k.simplify(changes, simplifier), v.simplify(changes, simplifier)) + for k, v in self.pairs] + return simplifier(self, changes) + + +class StringJoin(Expression): + type = text_type + + def __init__(self, parts): + self.parts = parts + + def as_ast(self): + return MethodCall(String(''), 'join', + [List(self.parts)], + expr_type=self.type).as_ast() + + def simplify(self, changes, simplifier): + # Simplify sub parts + self.parts = [part.simplify(changes, simplifier) for part in self.parts] + + # Merge adjacent String objects. + new_parts = [] + for part in self.parts: + if (len(new_parts) > 0 and + isinstance(new_parts[-1], String) and + isinstance(part, String)): + new_parts[-1] = String(new_parts[-1].string_value + + part.string_value) + else: + new_parts.append(part) + if len(new_parts) < len(self.parts): + changes.append(True) + self.parts = new_parts + + # See if we can eliminate the StringJoin altogether + if len(self.parts) == 0: + changes.append(True) + return simplifier(String(''), changes) + if len(self.parts) == 1: + changes.append(True) + return simplifier(self.parts[0], changes) + return simplifier(self, changes) + + def __repr__(self): + return 'StringJoin([{0}])'.format(', '.join(repr(p) for p in self.parts)) + + +class VariableReference(Expression): + def __init__(self, name, scope): + if name not in scope.names_in_use(): + raise AssertionError("Cannot refer to undefined variable '{0}'".format(name)) + self.name = name + self.type = scope.get_name_properties(name).get(PROPERTY_TYPE, UNKNOWN_TYPE) + + def as_ast(self): + if not allowable_name(self.name, allow_builtin=True): + raise AssertionError("Expected {0} to be a valid Python identifier".format(self.name)) + return ast.Name(id=self.name, ctx=ast.Load(), **DEFAULT_AST_ARGS) + + def __eq__(self, other): + return type(other) == type(self) and other.name == self.name + + def __repr__(self): + return 'VariableReference({0})'.format(repr(self.name)) + + +class FunctionCall(Expression): + def __init__(self, function_name, args, kwargs, scope, expr_type=UNKNOWN_TYPE): + if function_name not in scope.names_in_use(): + raise AssertionError("Cannot call unknown function '{0}'".format(function_name)) + self.function_name = function_name + self.args = args + self.kwargs = kwargs + if expr_type is UNKNOWN_TYPE: + # Try to find out automatically + expr_type = scope.get_name_properties(function_name).get(PROPERTY_RETURN_TYPE, expr_type) + self.type = expr_type + + def as_ast(self): + if not allowable_name(self.function_name, allow_builtin=True): + raise AssertionError("Expected {0} to be a valid Python identifier or builtin".format(self.function_name)) + + if self.function_name in SENSITIVE_FUNCTIONS: + raise AssertionError("Disallowing call to '{0}'".format(self.function_name)) + + for name in self.kwargs.keys(): + if not allowable_keyword_arg_name(name): + raise AssertionError("Expected {0} to be a valid Fluent NamedArgument name".format(name)) + + if any(not allowable_name(name) for name in self.kwargs.keys()): + # `my_function(**{})` syntax + kwarg_pairs = list(sorted(self.kwargs.items())) + kwarg_names, kwarg_values = [k for k, v in kwarg_pairs], [v for k, v in kwarg_pairs] + return ast.Call( + func=ast.Name(id=self.function_name, ctx=ast.Load(), **DEFAULT_AST_ARGS), + args=[arg.as_ast() for arg in self.args], + keywords=[ast.keyword(arg=None, + value=ast.Dict(keys=[ast.Str(k, **DEFAULT_AST_ARGS) + for k in kwarg_names], + values=[v.as_ast() for v in kwarg_values], + **DEFAULT_AST_ARGS), + **DEFAULT_AST_ARGS)], + **DEFAULT_AST_ARGS) + + # Normal `my_function(kwarg=foo)` syntax + return ast.Call( + func=ast.Name(id=self.function_name, ctx=ast.Load(), **DEFAULT_AST_ARGS), + args=[arg.as_ast() for arg in self.args], + keywords=[ast.keyword(arg=name, value=value.as_ast(), **DEFAULT_AST_ARGS) + for name, value in self.kwargs.items()], + **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.args = [arg.simplify(changes, simplifier) for arg in self.args] + self.kwargs = {name: val.simplify(changes, simplifier) for name, val in self.kwargs.items()} + return simplifier(self, changes) + + def __repr__(self): + return 'FunctionCall({0}, {1}, {2})'.format(self.function_name, self.args, self.kwargs) + + +class MethodCall(Expression): + def __init__(self, obj, method_name, args, expr_type=UNKNOWN_TYPE): + # We can't check method_name because we don't know the type of obj yet. + self.obj = obj + self.method_name = method_name + self.args = args + self.type = expr_type + + def as_ast(self): + if not allowable_name(self.method_name, for_method=True): + raise AssertionError("Expected {0} to be a valid Python identifier".format(self.method_name)) + return ast.Call( + func=ast.Attribute(value=self.obj.as_ast(), + attr=self.method_name, + ctx=ast.Load(), + **DEFAULT_AST_ARGS), + args=[arg.as_ast() for arg in self.args], + keywords=[], + **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.obj = self.obj.simplify(changes, simplifier) + self.args = [arg.simplify(changes, simplifier) for arg in self.args] + return simplifier(self, changes) + + def __repr__(self): + return 'MethodCall({0}, {1}, {2})'.format(repr(self.obj), + repr(self.method_name), + repr(self.args)) + + +class DictLookup(Expression): + def __init__(self, lookup_obj, lookup_arg, expr_type=UNKNOWN_TYPE): + self.lookup_obj = lookup_obj + self.lookup_arg = lookup_arg + self.type = expr_type + + def as_ast(self): + return ast.Subscript( + value=self.lookup_obj.as_ast(), + slice=ast.Index(value=self.lookup_arg.as_ast(), **DEFAULT_AST_ARGS), + ctx=ast.Load(), + **DEFAULT_AST_ARGS) + + def simplify(self, changes, simplifier): + self.lookup_obj = self.lookup_obj.simplify(changes, simplifier) + self.lookup_arg = self.lookup_arg.simplify(changes, simplifier) + return simplifier(self, changes) + + +ObjectCreation = FunctionCall + + +class NoneExpr(Expression): + type = type(None) + + def as_ast(self): + return ast.NameConstant( + value=None, + **DEFAULT_AST_ARGS) + + +class BinaryOperator(Expression): + def __init__(self, left, right): + self.left = left + self.right = right + + def simplify(self, changes, simplifier): + self.left = self.left.simplify(changes, simplifier) + self.right = self.right.simplify(changes, simplifier) + return simplifier(self, changes) + + +class Equals(BinaryOperator): + type = bool + + def as_ast(self): + return ast.Compare( + left=self.left.as_ast(), + comparators=[self.right.as_ast()], + ops=[ast.Eq()], + **DEFAULT_AST_ARGS) + + +class BoolOp(BinaryOperator): + type = bool + op = NotImplemented + + def as_ast(self): + return ast.BoolOp( + op=self.op(), values=[self.left.as_ast(), + self.right.as_ast()], + **DEFAULT_AST_ARGS) + + +class Or(BoolOp): + op = ast.Or + + +def simplify(codegen_ast, simplifier=None): + if simplifier is None: + def simplifier(n, changes): + return n + changes = [True] + while any(changes): + changes = [] + codegen_ast = codegen_ast.simplify(changes, simplifier) + return codegen_ast diff --git a/fluent.runtime/fluent/runtime/compiler.py b/fluent.runtime/fluent/runtime/compiler.py new file mode 100644 index 0000000..e89537e --- /dev/null +++ b/fluent.runtime/fluent/runtime/compiler.py @@ -0,0 +1,1016 @@ +from __future__ import absolute_import, unicode_literals + +import contextlib +from collections import OrderedDict + +import attr +import babel +import six + +from fluent.syntax.ast import (Attribute, BaseNode, Identifier, Message, FunctionReference, + MessageReference, NumberLiteral, Pattern, Placeable, SelectExpression, StringLiteral, + Term, TermReference, TextElement, VariableReference) + +from . import codegen, runtime +from .errors import FluentCyclicReferenceError, FluentFormatError, FluentReferenceError +from .types import FluentDateType, FluentNone, FluentNumber, FluentType +from .utils import (ATTRIBUTE_SEPARATOR, TERM_SIGIL, args_match, + attribute_ast_to_id, inspect_function_args, reference_to_id, + unknown_reference_error_obj) + +try: + from functools import singledispatch +except ImportError: + # Python < 3.4 + from singledispatch import singledispatch + +text_type = six.text_type + +# Unicode bidi isolation characters. +FSI = "\u2068" +PDI = "\u2069" + +BUILTIN_NUMBER = 'NUMBER' +BUILTIN_DATETIME = 'DATETIME' +BUILTIN_RETURN_TYPES = { + BUILTIN_NUMBER: FluentNumber, + BUILTIN_DATETIME: FluentDateType, +} + +# Function argument and global names:: +MESSAGE_ARGS_NAME = "message_args" +ERRORS_NAME = "errors" +MESSAGE_FUNCTION_ARGS = [MESSAGE_ARGS_NAME, ERRORS_NAME] +LOCALE_NAME = "locale" +PLURAL_FORM_FOR_NUMBER_NAME = 'plural_form_for_number' + +CLDR_PLURAL_FORMS = set([ + 'zero', + 'one', + 'two', + 'few', + 'many', + 'other', +]) +PROPERTY_EXTERNAL_ARG = 'PROPERTY_EXTERNAL_ARG' + + +@attr.s +class CurrentEnvironment(object): + # The parts of CompilerEnvironment that we want to mutate (and restore) + # temporarily for some parts of a call chain. + message_id = attr.ib(default=None) + term_args = attr.ib(default=None) + in_select_expression = attr.ib(default=False) + + +@attr.s +class CompilerEnvironment(object): + locale = attr.ib() + plural_form_function = attr.ib() + use_isolating = attr.ib() + message_mapping = attr.ib(factory=dict) + errors = attr.ib(factory=list) + functions = attr.ib(factory=dict) + function_renames = attr.ib(factory=dict) + functions_arg_spec = attr.ib(factory=dict) + message_ids_to_ast = attr.ib(factory=dict) + term_ids_to_ast = attr.ib(factory=dict) + current = attr.ib(factory=CurrentEnvironment) + + def add_current_message_error(self, error): + self.errors.append((self.current.message_id, error)) + + @contextlib.contextmanager + def modified(self, **replacements): + """ + Context manager that modifies the 'current' attribute of the + environment, restoring the old data at the end. + """ + # CurrentEnvironment only has immutable args at the moment, so the + # shallow copy returned by attr.evolve is fine. + old_current = self.current + self.current = attr.evolve(old_current, **replacements) + yield self + self.current = old_current + + def modified_for_term_reference(self, term_args=None): + return self.modified(term_args=term_args if term_args is not None else {}) + + +def compile_messages(messages, locale, use_isolating=True, functions=None): + """ + Compile a dictionary of {id: Message/Term objects} to a Python module, + and returns a tuple: + (dictionary mapping the message IDs to Python functions, + error list) + + The error list is itself a list of two tuples: + (message id, exception object) + """ + if functions is None: + functions = {} + module, message_mapping, module_globals, errors = messages_to_module( + messages, locale, + use_isolating=use_isolating, + functions=functions) + + code_obj = compile(module.as_ast(), '', 'exec') + exec(code_obj, module_globals) + retval = {} + for key, val in message_mapping.items(): + if key.startswith(TERM_SIGIL): + # term, shouldn't be in publicly available messages + continue + retval[six.text_type(key)] = module_globals[val] + + return (retval, errors) + + +def messages_to_module(messages, locale, use_isolating=True, functions=None): + """ + Compile a set of {id: Message/Term objects} to a Python module, returning a tuple: + (codegen.Module object, dictionary mapping message IDs to Python functions, + module globals dictionary, errors list) + """ + if functions is None: + functions = {} + + message_ids_to_ast = OrderedDict(get_message_function_ast(messages)) + term_ids_to_ast = OrderedDict(get_term_ast(messages)) + + # Plural form function + plural_form_for_number_main = babel.plural.to_python(locale.plural_form) + + def plural_form_for_number(number): + try: + return plural_form_for_number_main(number) + except TypeError: + # This function can legitimately be passed strings if we incorrectly + # guessed it was a CLDR category. So we ignore silently + return None + + function_arg_errors = [] + compiler_env = CompilerEnvironment( + locale=locale, + plural_form_function=plural_form_for_number, + use_isolating=use_isolating, + functions=functions, + functions_arg_spec={name: inspect_function_args(func, name, function_arg_errors) + for name, func in functions.items()}, + message_ids_to_ast=message_ids_to_ast, + term_ids_to_ast=term_ids_to_ast, + ) + for err in function_arg_errors: + compiler_env.add_current_message_error(err) + + # Setup globals, and reserve names for them + module_globals = { + k: getattr(runtime, k) for k in runtime.__all__ + } + module_globals.update(six.moves.builtins.__dict__) + module_globals[LOCALE_NAME] = locale + + # Return types of known functions. + known_return_types = {} + known_return_types.update(BUILTIN_RETURN_TYPES) + known_return_types.update(runtime.RETURN_TYPES) + + module_globals[PLURAL_FORM_FOR_NUMBER_NAME] = plural_form_for_number + known_return_types[PLURAL_FORM_FOR_NUMBER_NAME] = text_type + + def get_name_properties(name): + properties = {} + if name in known_return_types: + properties[codegen.PROPERTY_RETURN_TYPE] = known_return_types[name] + return properties + + module = codegen.Module() + for k in module_globals: + name = module.scope.reserve_name( + k, + properties=get_name_properties(k), + is_builtin=k in six.moves.builtins.__dict__ + ) + # We should have chosen all our module_globals to avoid name conflicts: + assert name == k, "Expected {0}=={1}".format(name, k) + + # Reserve names for function arguments, so that we always + # know the name of these arguments without needing to do + # lookups etc. + for arg in MESSAGE_FUNCTION_ARGS: + module.scope.reserve_function_arg_name(arg) + + # -- User defined names + # functions from context + for name, func in functions.items(): + # These might clash, because we can't control what the user passed in, + # so we make a record in 'function_renames' + assigned_name = module.scope.reserve_name(name, properties=get_name_properties(name)) + compiler_env.function_renames[name] = assigned_name + module_globals[assigned_name] = func + + # Pass one, find all the names, so that we can populate message_mapping, + # which is needed for compilation. + for msg_id, msg in message_ids_to_ast.items(): + function_name = module.scope.reserve_name( + message_function_name_for_msg_id(msg_id), + properties={codegen.PROPERTY_RETURN_TYPE: text_type} + ) + compiler_env.message_mapping[msg_id] = function_name + + # Pass 2, actual compilation + for msg_id, msg in message_ids_to_ast.items(): + with compiler_env.modified(message_id=msg_id): + function_name = compiler_env.message_mapping[msg_id] + function = compile_message(msg, msg_id, function_name, module, compiler_env) + module.add_function(function_name, function) + + module = codegen.simplify(module, Simplifier(compiler_env)) + return (module, compiler_env.message_mapping, module_globals, compiler_env.errors) + + +def get_message_function_ast(message_dict): + for msg_id, msg in message_dict.items(): + if isinstance(msg, Term): + continue + if msg.value is not None: # has a body + yield (msg_id, msg) + for attribute in msg.attributes: + yield (attribute_ast_to_id(attribute, msg), attribute) + + +def get_term_ast(message_dict): + for term_id, term in message_dict.items(): + if isinstance(term, Message): + pass + if term.value is not None: # has a body + yield (term_id, term) + + for attribute in term.attributes: + yield (attribute_ast_to_id(attribute, term), attribute) + + +def message_function_name_for_msg_id(msg_id): + # Scope.reserve_name does further sanitising of name, which we don't need to + # worry about. + return msg_id.replace(ATTRIBUTE_SEPARATOR, '__').replace('-', '_') + + +def compile_message(msg, msg_id, function_name, module, compiler_env): + msg_func = codegen.Function(parent_scope=module.scope, + name=function_name, + args=MESSAGE_FUNCTION_ARGS) + function_block = msg_func.body + if contains_reference_cycle(msg, compiler_env): + error = FluentCyclicReferenceError("Cyclic reference in {0}".format(msg_id)) + add_static_msg_error(function_block, error) + compiler_env.add_current_message_error(error) + return_expression = finalize_expr_as_string(make_fluent_none(None, module.scope), function_block, compiler_env) + else: + return_expression = compile_expr(msg, function_block, compiler_env) + # > return $return_expression + msg_func.add_return(return_expression) + return msg_func + + +def traverse_ast(node, fun, exclude_attributes=None): + """ + Postorder-traverse this node and apply `fun` to all child nodes. + + exclude_attributes is a list of (node type, attribute name) tuples + that should not be recursed into. + """ + + def visit(value): + """Call `fun` on `value` and its descendants.""" + if isinstance(value, BaseNode): + return traverse_ast(value, fun, exclude_attributes=exclude_attributes) + if isinstance(value, list): + return fun(list(map(visit, value))) + return fun(value) + + # Use all attributes found on the node + parts = vars(node).items() + for name, value in parts: + if exclude_attributes is not None and (type(node), name) in exclude_attributes: + continue + visit(value) + + return fun(node) + + +def contains_reference_cycle(msg, compiler_env): + """ + Returns True if the message 'msg' contains a cyclic reference, + in the context of the other messages provided in compiler_env + """ + # We traverse the AST starting from message, jumping to other messages and + # terms as necessary, and seeing if a path through the AST loops back to + # previously visited nodes at any point. + + # This algorithm has some bugs compared to the runtime method in resolver.py + # For example, a pair of conditionally mutually recursive messages: + + # foo = Foo { $arg -> + # [left] { bar } + # *[right] End + # } + + # bar = Bar { $arg -> + # *[left] End + # [right] { foo } + # } + + # These messages are rejected as containing cycles by this checker, when in + # fact they cannot go into an infinite loop, and the resolver correctly + # executes them. + + # It is pretty difficult to come up with a compelling use case + # for this kind of thing though... so we are not too worried + # about fixing this bug, since we are erring on the conservative side. + + message_ids_to_ast = compiler_env.message_ids_to_ast + term_ids_to_ast = compiler_env.term_ids_to_ast + + # We exclude recursing into certain attributes, because we already cover + # these recursions explicitly by jumping to a subnode for the case of + # references. + exclude_attributes = [ + # Message and Term attributes have already been loaded into the message_ids_to_ast dict, + (Message, 'attributes'), + (Term, 'attributes'), + + # for speed + (Message, 'comment'), + (Term, 'comment'), + ] + + # We need to keep track of visited nodes. If we use just a single set for + # each top level message, then things like this would be rejected: + # + # message = { -term } { -term } + # + # because we would visit the term twice. + # + # So we have a stack of sets: + visited_node_stack = [set([])] + # The top of this stack represents the set of nodes in the current path of + # visited nodes. We push a copy of the top set onto the stack when we + # traverse into a sub-node, and pop it off when we come back. + + checks = [] + + def checker(node): + if isinstance(node, BaseNode): + node_id = id(node) + if node_id in visited_node_stack[-1]: + checks.append(True) + return + visited_node_stack[-1].add(node_id) + else: + return + + # The logic below duplicates the logic that is used for 'jumping' to + # different nodes (messages via a runtime function call, terms via + # inlining), including the fallback strategies that are used. + sub_node = None + if isinstance(node, (MessageReference, TermReference)): + ref_id = reference_to_id(node) + if ref_id in message_ids_to_ast: + sub_node = message_ids_to_ast[ref_id] + elif ref_id in term_ids_to_ast: + sub_node = term_ids_to_ast[ref_id] + elif node.attribute: + # No match for attribute, but compiler falls back to parent ref + # in this situation, so we have to as well. + parent_ref_id = reference_to_id(node, ignore_attributes=True) + if parent_ref_id in message_ids_to_ast: + sub_node = message_ids_to_ast[parent_ref_id] + elif parent_ref_id in term_ids_to_ast: + sub_node = term_ids_to_ast[parent_ref_id] + + if sub_node is not None: + visited_node_stack.append(visited_node_stack[-1].copy()) + traverse_ast(sub_node, checker, exclude_attributes=exclude_attributes) + if any(checks): + return + visited_node_stack.pop() + + return + + traverse_ast(msg, checker, exclude_attributes=exclude_attributes) + return any(checks) + + +# Begin 'compile_expr' implementation + +@singledispatch +def compile_expr(element, block, compiler_env): + """ + Compiles a Fluent expression into a Python one, return + an object of type codegen.Expression. + + This may also add statements into block, which is assumed + to be a function that returns a message, or a branch of that + function. + """ + raise NotImplementedError("Cannot handle object of type {0}" + .format(type(element).__name__)) + + +@compile_expr.register(Message) +def compile_expr_message(message, block, compiler_env): + return compile_expr(message.value, block, compiler_env) + + +@compile_expr.register(Term) +def compile_expr_term(term, block, compiler_env): + return compile_expr(term.value, block, compiler_env) + + +@compile_expr.register(Attribute) +def compile_expr_attribute(attribute, block, compiler_env): + return compile_expr(attribute.value, block, compiler_env) + + +@compile_expr.register(Pattern) +def compile_expr_pattern(pattern, block, compiler_env): + parts = [] + subelements = pattern.elements + + use_isolating = compiler_env.use_isolating and len(subelements) > 1 + + for element in pattern.elements: + wrap_this_with_isolating = use_isolating and not isinstance(element, TextElement) + if wrap_this_with_isolating: + parts.append(codegen.String(FSI)) + parts.append(compile_expr(element, block, compiler_env)) + if wrap_this_with_isolating: + parts.append(codegen.String(PDI)) + + # > ''.join($[p for p in parts]) + return codegen.StringJoin([finalize_expr_as_string(p, block, compiler_env) for p in parts]) + + +@compile_expr.register(TextElement) +def compile_expr_text(text, block, compiler_env): + return codegen.String(text.value) + + +@compile_expr.register(StringLiteral) +def compile_expr_string_expression(expr, block, compiler_env): + return codegen.String(expr.parse()['value']) + + +@compile_expr.register(NumberLiteral) +def compile_expr_number_expression(expr, block, compiler_env): + number_expr = codegen.Number(numeric_to_native(expr.value)) + # > NUMBER($number_expr) + return codegen.FunctionCall(BUILTIN_NUMBER, + [number_expr], + {}, + block.scope) + + +@compile_expr.register(Placeable) +def compile_expr_placeable(placeable, block, compiler_env): + return compile_expr(placeable.expression, block, compiler_env) + + +@compile_expr.register(MessageReference) +def compile_expr_message_reference(reference, block, compiler_env): + return handle_message_reference(reference, block, compiler_env) + + +def compile_term(term, block, compiler_env, term_args=None): + with compiler_env.modified_for_term_reference(term_args=term_args): + return compile_expr(term.value, block, compiler_env) + + +@compile_expr.register(TermReference) +def compile_expr_term_reference(reference, block, compiler_env): + term, err_obj = lookup_term_reference(reference, block, compiler_env) + if term is None: + return err_obj + if reference.arguments: + args = [compile_expr(arg, block, compiler_env) + for arg in reference.arguments.positional] + kwargs = {kwarg.name.name: compile_expr(kwarg.value, block, compiler_env) + for kwarg in reference.arguments.named} + + if args: + args_err = FluentFormatError("Ignored positional arguments passed to term '{0}'" + .format(reference_to_id(reference))) + add_static_msg_error(block, args_err) + compiler_env.add_current_message_error(args_err) + else: + kwargs = None + + return compile_term(term, block, compiler_env, term_args=kwargs) + + +@compile_expr.register(SelectExpression) +def compile_expr_select_expression(select_expr, block, compiler_env): + with compiler_env.modified(in_select_expression=True): + key_value = compile_expr(select_expr.selector, block, compiler_env) + static_retval = resolve_select_expression_statically(select_expr, key_value, block, compiler_env) + if static_retval is not None: + return static_retval + + if_statement = codegen.If(block.scope, parent_block=block) + key_tmp_name = reserve_and_assign_name(block, '_key', key_value) + + return_tmp_name = block.scope.reserve_name('_ret') + + need_plural_form = any(is_cldr_plural_form_key(variant.key) + for variant in select_expr.variants) + if need_plural_form: + plural_form_value = codegen.FunctionCall(PLURAL_FORM_FOR_NUMBER_NAME, + [block.scope.variable(key_tmp_name)], + {}, + block.scope) + # > $plural_form_tmp_name = plural_form_for_number($key_tmp_name) + plural_form_tmp_name = reserve_and_assign_name(block, '_plural_form', plural_form_value) + + assigned_types = [] + first = True + for variant in select_expr.variants: + if variant.default: + # This is the default, so gets chosen if nothing else matches, or + # there was no requested variant. Therefore we use the final 'else' + # block with no condition. + cur_block = if_statement.else_block + else: + # For cases like: + # { $arg -> + # [one] X + # [other] Y + # } + # we can't be sure whether $arg is a string, and the 'one' and 'other' + # keys are just strings, or whether $arg is a number and we need to + # do a plural category comparison. So we have to do both. We can use equality + # checks because they implicitly do a type check + # > $key_tmp_name == $variant.key + condition1 = codegen.Equals(block.scope.variable(key_tmp_name), + compile_expr(variant.key, block, compiler_env)) + + if is_cldr_plural_form_key(variant.key): + # > $plural_form_tmp_name == $variant.key + condition2 = codegen.Equals(block.scope.variable(plural_form_tmp_name), + compile_expr(variant.key, block, compiler_env)) + condition = codegen.Or(condition1, condition2) + else: + condition = condition1 + cur_block = if_statement.add_if(condition) + assigned_value = compile_expr(variant.value, cur_block, compiler_env) + cur_block.add_assignment(return_tmp_name, assigned_value, allow_multiple=not first) + first = False + assigned_types.append(assigned_value.type) + + if assigned_types: + first_type = assigned_types[0] + if all(t == first_type for t in assigned_types): + block.scope.set_name_properties(return_tmp_name, {codegen.PROPERTY_TYPE: first_type}) + + block.add_statement(if_statement) + return block.scope.variable(return_tmp_name) + + +@compile_expr.register(Identifier) +def compile_expr_variant_name(name, block, compiler_env): + # TODO - handle numeric literals here? + return codegen.String(name.name) + + +@compile_expr.register(VariableReference) +def compile_expr_variable_reference(argument, block, compiler_env): + name = argument.id.name + if compiler_env.current.term_args is not None: + # We are in a term, all args are passed explicitly, not inherited from + # external args. + if name in compiler_env.current.term_args: + return compiler_env.current.term_args[name] + return make_fluent_none(name, block.scope) + + # Otherwise we are in a message, lookup at runtime. + + # We might have already looked it up: + existing = block.scope.find_names_by_property(PROPERTY_EXTERNAL_ARG, name) + # Name reservation is done at scope level. We also need to check that it has + # been defined in this block, or a parent block to this one. + if existing and block.has_assignment_for_name(existing[0]): + arg_tmp_name = existing[0] + else: + arg_tmp_name = block.scope.reserve_name('_arg', properties={PROPERTY_EXTERNAL_ARG: name}) + + # Arguments we get out of the args dictionary should be wrapped + # into 'native' Fluent types using `handle_argument`. + # Except, in a select expression, we only care about matching against a selector, so + # don't need to do this wrapping + wrap_with_handle_argument = not compiler_env.current.in_select_expression + if wrap_with_handle_argument: + arg_handled_tmp_name = block.scope.reserve_name('_arg_h') + handle_argument_func_call = codegen.FunctionCall( + "handle_argument", + [block.scope.variable(arg_tmp_name), + codegen.String(name), + block.scope.variable(LOCALE_NAME), + block.scope.variable(ERRORS_NAME)], + {}, + block.scope) + + if block.scope.has_assignment(arg_tmp_name): # already assigned to this, can re-use + if not wrap_with_handle_argument: + return block.variable(arg_tmp_name) + + block.add_assignment( + arg_handled_tmp_name, + handle_argument_func_call) + return block.scope.variable(arg_handled_tmp_name) + + # Add try/except/else to lookup variable. + try_except = codegen.Try([block.scope.variable("LookupError"), + block.scope.variable("TypeError") # for when args=None + ], + block.scope) + block.add_statement(try_except) + + # Try block + # > $arg_tmp_name = message_args[$name] + try_except.try_block.add_assignment( + arg_tmp_name, + codegen.DictLookup(block.scope.variable(MESSAGE_ARGS_NAME), + codegen.String(name))) + # Except block + add_static_msg_error(try_except.except_block, + FluentReferenceError("Unknown external: {0}".format(name))) + # > $arg_tmp_name = FluentNone("$name") + try_except.except_block.add_assignment( + arg_tmp_name, + make_fluent_none(name, block.scope), + allow_multiple=True) + + if not wrap_with_handle_argument: + return block.scope.variable(arg_tmp_name) + + # We can use except/else blocks to do wrapping. + # Except block: + # We don't want to add 'handle_argument' round FluentNone instances, + # it does the wrong thing. + # > $arg_handled_tmp_name = $arg_tmp_name + try_except.except_block.add_assignment( + arg_handled_tmp_name, + block.scope.variable(arg_tmp_name)) + + # else block: + # > $handled_tmp_name = handle_argument($arg_tmp_name, "$name", locale, errors) + try_except.else_block.add_assignment( + arg_handled_tmp_name, + handle_argument_func_call, + allow_multiple=True) + + return block.scope.variable(arg_handled_tmp_name) + + +@compile_expr.register(FunctionReference) +def compile_expr_function_reference(expr, block, compiler_env): + args = [compile_expr(arg, block, compiler_env) for arg in expr.arguments.positional] + kwargs = {kwarg.name.name: compile_expr(kwarg.value, block, compiler_env) for kwarg in expr.arguments.named} + + # builtin or custom function + function_name = expr.id.name + + if function_name in compiler_env.functions: + match, sanitized_args, sanitized_kwargs, errors = args_match(function_name, args, kwargs, + compiler_env.functions_arg_spec[function_name]) + for error in errors: + add_static_msg_error(block, error) + compiler_env.add_current_message_error(error) + + if match: + function_name_in_module = compiler_env.function_renames[function_name] + return codegen.FunctionCall(function_name_in_module, sanitized_args, sanitized_kwargs, block.scope) + return make_fluent_none(function_name + "()", block.scope) + + error = FluentReferenceError("Unknown function: {0}" + .format(function_name)) + add_static_msg_error(block, error) + compiler_env.add_current_message_error(error) + return make_fluent_none(function_name + "()", block.scope) + + # if isinstance(expr.callee, (TermReference, AttributeExpression)): + # if args: + # args_err = FluentFormatError("Ignored positional arguments passed to term '{0}'" + # .format(reference_to_id(expr.callee))) + # add_static_msg_error(block, args_err) + # compiler_env.add_current_message_error(args_err) + + # term, err = lookup_term_reference(expr.callee, block, compiler_env) + # if term is None: + # return err + # return compile_term(term, block, compiler_env, term_args=kwargs) + + +# End compile_expr implementations + +# Compiler utilities and common code: + +def add_msg_error_with_expr(block, exception_expr): + block.add_statement( + codegen.MethodCall( + block.scope.variable(ERRORS_NAME), + "append", + [exception_expr])) + + +def add_static_msg_error(block, exception): + """ + Given a block and an exception object, inspect the object and add the code + to the scope needed to create and add that exception to the returned errors + list. + + """ + return add_msg_error_with_expr( + block, + codegen.ObjectCreation(exception.__class__.__name__, + [codegen.String(exception.args[0])], + {}, + block.scope)) + + +def do_message_call(msg_id, block, compiler_env): + msg_func_name = compiler_env.message_mapping[msg_id] + if compiler_env.current.term_args is not None: + # Message call from inside a term. + # We pass term args to message function, not external args. + term_arg_dict = codegen.Dict([(codegen.String(k), v) + for k, v in sorted(compiler_env.current.term_args.items())]) + call_args = [term_arg_dict, block.scope.variable(ERRORS_NAME)] + else: + call_args = [block.scope.variable(a) for a in MESSAGE_FUNCTION_ARGS] + return codegen.FunctionCall(msg_func_name, + call_args, + {}, + block.scope) + + +def finalize_expr_as_string(codegen_ast, block, compiler_env): + """ + Wrap an outputted Python expression with code to ensure that it will return + a string. + """ + if issubclass(codegen_ast.type, text_type): + return codegen_ast + if issubclass(codegen_ast.type, FluentType): + # > $codegen_ast.format(locale) + return codegen.MethodCall(codegen_ast, + 'format', + [block.scope.variable(LOCALE_NAME)], + expr_type=text_type) + + # > handle_output($codegen_ast, locale, errors) + return codegen.FunctionCall('handle_output', + [codegen_ast, + block.scope.variable(LOCALE_NAME), + block.scope.variable(ERRORS_NAME)], + {}, + block.scope, + expr_type=text_type) + + +def is_cldr_plural_form_key(key_expr): + return (isinstance(key_expr, Identifier) and + key_expr.name in CLDR_PLURAL_FORMS) + + +def is_NUMBER_call_expr(expr): + """ + Returns True if the object is a FTL ast.FunctionReference representing a call to NUMBER + """ + return (isinstance(expr, FunctionReference) and + expr.id.name == 'NUMBER') + + +def lookup_term_reference(ref, block, compiler_env): + # This could be turned into 'handle_term_reference', (similar to + # 'handle_message_reference' below) once VariantList and VariantExpression + # go away. + term_id = reference_to_id(ref) + if term_id in compiler_env.term_ids_to_ast: + return compiler_env.term_ids_to_ast[term_id], None + # Fallback to parent + if ref.attribute: + parent_id = reference_to_id(ref, ignore_attributes=True) + if parent_id in compiler_env.term_ids_to_ast: + error = unknown_reference_error_obj(term_id) + add_static_msg_error(block, error) + compiler_env.add_current_message_error(error) + return compiler_env.term_ids_to_ast[parent_id], None + return None, unknown_reference(term_id, block, compiler_env) + + +def handle_message_reference(ref, block, compiler_env): + msg_id = reference_to_id(ref) + if msg_id in compiler_env.message_ids_to_ast: + return do_message_call(msg_id, block, compiler_env) + # Fallback to parent + if ref.attribute: + parent_id = reference_to_id(ref, ignore_attributes=True) + if parent_id in compiler_env.message_ids_to_ast: + error = unknown_reference_error_obj(msg_id) + add_static_msg_error(block, error) + compiler_env.add_current_message_error(error) + return do_message_call(parent_id, block, compiler_env) + return unknown_reference(msg_id, block, compiler_env) + + +def make_fluent_none(name, scope): + # > FluentNone(name) + # OR + # > FluentNone() + return codegen.ObjectCreation('FluentNone', + [codegen.String(name)] if name else [], + {}, + scope) + + +def numeric_to_native(val): + """ + Given a numeric string (as defined by fluent spec), + return an int or float + """ + # val matches this EBNF: + # '-'? [0-9]+ ('.' [0-9]+)? + if '.' in val: + return float(val) + return int(val) + + +def reserve_and_assign_name(block, suggested_name, value): + """ + Reserves a name for the value in the scope block and adds assignment if + necessary, returning the name reserved. + + May skip the assignment if not necessary. + """ + if isinstance(value, codegen.VariableReference): + # We don't need a new name, we can re-use this one. + return value.name + + name = block.scope.reserve_name(suggested_name) + block.add_assignment(name, value) + return name + + +def resolve_select_expression_statically(select_expr, key_ast, block, compiler_env): + """ + Resolve a select expression statically, given a codegen.PythonAst object + `key_ast` representing the key value, or return None if not possible. + """ + # We need to 'peek' inside what we've produced so far to see if it is something + # static. To do that reliably we must simplify at this point: + key_ast = codegen.simplify(key_ast) + key_is_fluent_none = is_fluent_none(key_ast) + key_is_number = (isinstance(key_ast, codegen.Number) or + (is_NUMBER_function_call(key_ast) and isinstance(key_ast.args[0], codegen.Number))) + key_is_string = isinstance(key_ast, codegen.String) + if not (key_is_string or key_is_number or key_is_fluent_none): + return None + + if key_is_number: + if isinstance(key_ast, codegen.Number): + key_number_value = key_ast.number + else: + # peek into the number literal inside the `NUMBER` call. + key_number_value = key_ast.args[0].number + + default_variant = None + found = None + for variant in select_expr.variants: + if variant.default: + default_variant = variant + if key_is_fluent_none: + found = variant + break + if key_is_string: + if (isinstance(variant.key, Identifier) and + key_ast.string_value == variant.key.name): + found = variant + break + elif key_is_number: + if (isinstance(variant.key, NumberLiteral) and + key_number_value == numeric_to_native(variant.key.value)): + found = variant + break + elif (isinstance(variant.key, Identifier) and + compiler_env.plural_form_function(key_number_value) == variant.key.name): + found = variant + break + if found is None: + found = default_variant + + return compile_expr(found.value, block, compiler_env) + + +def unknown_reference(name, block, compiler_env): + error = unknown_reference_error_obj(name) + add_static_msg_error(block, error) + compiler_env.add_current_message_error(error) + return make_fluent_none(name, block.scope) + + +# AST checking and simplification + +def is_DATETIME_function_call(codegen_ast): + return (isinstance(codegen_ast, codegen.FunctionCall) and + codegen_ast.function_name == BUILTIN_DATETIME) + + +def is_fluent_none(codegen_ast): + return (isinstance(codegen_ast, codegen.ObjectCreation) and + codegen_ast.function_name == 'FluentNone' and + (len(codegen_ast.args) == 0 or + isinstance(codegen_ast.args[0], codegen.String))) + + +def is_NUMBER_function_call(codegen_ast): + return (isinstance(codegen_ast, codegen.FunctionCall) and + codegen_ast.function_name == BUILTIN_NUMBER) + + +class Simplifier(object): + def __init__(self, compiler_env): + self.compiler_env = compiler_env + + def __call__(self, codegen_ast, changes): + # Simplifications we can do on the AST tree. This function works + # similarly to the PythonAst.simplify() methods i.e. we append to + # changes if we made a change, and either mutate codegen_ast or + # return a new/different object. + + # The logic here wouldn't be appropriate to put into codegen simplify() + # methods because it is higher level and contains more logic specific to + # Fluent. + + # We match against a number of patterns: + + # NUMBER(NUMBER(...)) -> NUMBER(...) (i.e. no keyword args) + if (is_NUMBER_function_call(codegen_ast) and not codegen_ast.kwargs and + is_NUMBER_function_call(codegen_ast.args[0])): + changes.append(True) + return codegen_ast.args[0] + + # NUMBER(NUMBER(x), kwargs=...) -> NUMBER(x, kwargs=...) + if (is_NUMBER_function_call(codegen_ast) and is_NUMBER_function_call(codegen_ast.args[0]) and + not codegen_ast.args[0].kwargs): + changes.append(True) + codegen_ast.args[0] = codegen_ast.args[0].args[0] + + # Numeric literals in some function call keyword arguments don't need to be + # wrapper in NUMBER + # e.g. NUMBER(x, minimumIntegerDigits=NUMBER(1)) -> NUMBER(x, minimumIntegerDigits=1) + # DATETIME(x, hour12=NUMBER(1)) -> DATETIME(x, hour12=1) + # We can't be sure for other custom functions, it depends how the args are used. + if (is_DATETIME_function_call(codegen_ast) or is_NUMBER_function_call(codegen_ast)) and codegen_ast.kwargs: + for kwarg_name, kwarg_value in list(codegen_ast.kwargs.items()): + if is_NUMBER_function_call(kwarg_value) and not kwarg_value.kwargs: + codegen_ast.kwargs[kwarg_name] = kwarg_value.args[0] + changes.append(True) + + # Numeric literals used in comparisons (select expressions) don't need to be wrapped + # in NUMBER(), because FluentNumber and int/float compare in the same way. + # x == NUMBER(y) -> x == y + if (isinstance(codegen_ast, codegen.Equals) and + is_NUMBER_function_call(codegen_ast.left) and + not codegen_ast.left.kwargs): + codegen_ast.left = codegen_ast.left.args[0] + changes.append(True) + # NUMBER(y) == x -> y == x + if (isinstance(codegen_ast, codegen.Equals) and + is_NUMBER_function_call(codegen_ast.right) and + not codegen_ast.right.kwargs): + codegen_ast.right = codegen_ast.right.args[0] + changes.append(True) + + # FluentNone('x').format(locale) -> 'x' + if (isinstance(codegen_ast, codegen.MethodCall) and + is_fluent_none(codegen_ast.obj) and + codegen_ast.method_name == 'format' and + isinstance(codegen_ast.args[0], codegen.VariableReference) and + codegen_ast.args[0].name == LOCALE_NAME): + make_fluent_none_call = codegen_ast.obj + + # We can make the FluentNone object now, call its format method + if len(make_fluent_none_call.args) == 0: + none_object = FluentNone() + elif isinstance(make_fluent_none_call.args[0], codegen.String): + none_object = FluentNone(make_fluent_none_call.args[0].string_value) + else: + none_object = None + + if none_object is not None: + changes.append(True) + return codegen.String(none_object.format(self.compiler_env.locale)) + + return codegen_ast diff --git a/fluent.runtime/fluent/runtime/errors.py b/fluent.runtime/fluent/runtime/errors.py index 5c25da4..7ff89e4 100644 --- a/fluent.runtime/fluent/runtime/errors.py +++ b/fluent.runtime/fluent/runtime/errors.py @@ -1,15 +1,32 @@ from __future__ import absolute_import, unicode_literals -class FluentFormatError(ValueError): +class FluentError(ValueError): + # This equality method exists to make exact tests for exceptions much + # simpler to write, at least for our own errors. def __eq__(self, other): return ((other.__class__ == self.__class__) and other.args == self.args) +class FluentFormatError(FluentError): + pass + + class FluentReferenceError(FluentFormatError): pass class FluentCyclicReferenceError(FluentFormatError): pass + + +class FluentDuplicateMessageId(FluentError): + pass + + +class FluentJunkFound(FluentError): + def __init__(self, *args): + super(FluentJunkFound, self).__init__(*args) + self.message = args[0] + self.annotations = args[1] diff --git a/fluent.runtime/fluent/runtime/resolver.py b/fluent.runtime/fluent/runtime/resolver.py index 66c10da..382f1b7 100644 --- a/fluent.runtime/fluent/runtime/resolver.py +++ b/fluent.runtime/fluent/runtime/resolver.py @@ -196,7 +196,7 @@ def __call__(self, env): def lookup_reference(ref, env): """ - Given a MessageReference, TermReference or AttributeExpression, returns the + Given a MessageReference or TermReference, returns the AST node, or FluentNone if not found, including fallback logic """ ref_id = reference_to_id(ref) diff --git a/fluent.runtime/fluent/runtime/runtime.py b/fluent.runtime/fluent/runtime/runtime.py new file mode 100644 index 0000000..a9be49d --- /dev/null +++ b/fluent.runtime/fluent/runtime/runtime.py @@ -0,0 +1,48 @@ +# Runtime functions for compiled messages + +from datetime import date, datetime +from decimal import Decimal + +import six + +from .errors import FluentCyclicReferenceError, FluentFormatError, FluentReferenceError +from .types import FluentNone, FluentType, fluent_date, fluent_number + +__all__ = ['handle_argument', 'handle_output', 'FluentCyclicReferenceError', 'FluentReferenceError', + 'FluentFormatError', 'FluentNone'] + + +text_type = six.text_type + +RETURN_TYPES = { + 'handle_argument': object, + 'handle_output': text_type, + 'FluentReferenceError': FluentReferenceError, + 'FluentFormatError': FluentFormatError, + 'FluentNone': FluentNone, +} + + +def handle_argument(arg, name, locale, errors): + # This needs to be synced with resolver.handle_variable_reference + if isinstance(arg, text_type): + return arg + elif isinstance(arg, (int, float, Decimal)): + return fluent_number(arg) + elif isinstance(arg, (date, datetime)): + return fluent_date(arg) + errors.append(TypeError("Unsupported external type: {0}, {1}" + .format(name, type(arg)))) + return name + + +def handle_output(val, locale, errors): + if isinstance(val, text_type): + return val + elif isinstance(val, FluentType): + return val.format(locale) + else: + # The only way for this branch to run is when functions return + # objects of the wrong type. + raise TypeError("Cannot handle object {0} of type {1}" + .format(val, type(val).__name__)) diff --git a/fluent.runtime/fluent/runtime/types.py b/fluent.runtime/fluent/runtime/types.py index 786ebfe..5f6704f 100644 --- a/fluent.runtime/fluent/runtime/types.py +++ b/fluent.runtime/fluent/runtime/types.py @@ -60,6 +60,9 @@ def __eq__(self, other): def format(self, locale): return self.name or "???" + def __repr__(self): + return ''.format(self.name) + @attr.s class NumberFormatOptions(object): diff --git a/fluent.runtime/fluent/runtime/utils.py b/fluent.runtime/fluent/runtime/utils.py index 2578893..f02a42b 100644 --- a/fluent.runtime/fluent/runtime/utils.py +++ b/fluent.runtime/fluent/runtime/utils.py @@ -7,6 +7,7 @@ from datetime import date, datetime from decimal import Decimal +import six from fluent.syntax.ast import Term, TermReference from .errors import FluentFormatError, FluentReferenceError @@ -45,6 +46,13 @@ def ast_to_id(ast): return ast.id.name +def attribute_ast_to_id(attribute, parent_ast): + """ + Returns a string reference for an Attribute, given Attribute and parent Term or Message + """ + return ''.join([ast_to_id(parent_ast), ATTRIBUTE_SEPARATOR, attribute.id.name]) + + if sys.version_info < (3,): # Python 3 has builtin str.isidentifier method, for Python 2 we refer to # https://docs.python.org/2/reference/lexical_analysis.html#identifiers diff --git a/fluent.runtime/runtests.py b/fluent.runtime/runtests.py index 191991a..1badb43 100755 --- a/fluent.runtime/runtests.py +++ b/fluent.runtime/runtests.py @@ -10,6 +10,7 @@ description="Run the test suite, or some tests") parser.add_argument('--coverage', "-c", action='store_true', help="Run with 'coverage'") +parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('test', type=str, nargs="*", help="Dotted path to a test module, case or method") @@ -22,6 +23,9 @@ else: cmd.extend(["discover", "-t", ".", "-s", "tests"]) +if args.verbose: + cmd.append("-v") + if args.coverage: cmd = ["-m", "coverage", "run"] + cmd diff --git a/fluent.runtime/setup.py b/fluent.runtime/setup.py index 21324f1..615edc1 100755 --- a/fluent.runtime/setup.py +++ b/fluent.runtime/setup.py @@ -1,6 +1,14 @@ #!/usr/bin/env python from setuptools import setup +import sys +if sys.version_info < (3, 4): + old_python_requires = ['singledispatch>=3.4'] +else: + # functools.singledispatch is in stdlib from Python 3.4 onwards. + old_python_requires = [] + +tests_requires = ['ast_decompiler>=0.3.2', 'hypothesis>=4.9.0'] setup(name='fluent.runtime', version='0.1', @@ -26,6 +34,10 @@ 'babel', 'pytz', 'six', - ], + ] + old_python_requires, test_suite='tests', + tests_require=tests_requires, # for 'setup.py test' + extras_require={ + 'develop': tests_requires, # for 'pip install fluent.runtime[develop]' + }, ) diff --git a/fluent.runtime/tests/__init__.py b/fluent.runtime/tests/__init__.py index e69de29..1e8469a 100644 --- a/fluent.runtime/tests/__init__.py +++ b/fluent.runtime/tests/__init__.py @@ -0,0 +1,34 @@ +from __future__ import absolute_import, unicode_literals + +from fluent.runtime import CompilingFluentBundle, InterpretingFluentBundle + + +fluent_bundle_implementations = [ + (InterpretingFluentBundle, "_Interpreter"), + (CompilingFluentBundle, "_Compiler") +] + + +def all_fluent_bundle_implementations(test_cls): + """ + Modifies a TestCase subclass to run all test methods + against all implementations of FluentBundle + """ + # Replace 'test_' methods with multiple versions, one for each + # implementation. + for attr_key, attr_value in list(test_cls.__dict__.items()): + if attr_key.startswith('test_') and callable(attr_value): + delattr(test_cls, attr_key) + for cls, suffix in fluent_bundle_implementations: + new_attr_key = attr_key + suffix + setattr(test_cls, new_attr_key, attr_value) + + # Add an '__init__' that selects the right implementation. + def __init__(self, methodName='runTest'): + for cls, suffix in fluent_bundle_implementations: + if methodName.endswith(suffix): + self.fluent_bundle_cls = cls + super(test_cls, self).__init__(methodName=methodName) + + test_cls.__init__ = __init__ + return test_cls diff --git a/fluent.runtime/tests/format/test_arguments.py b/fluent.runtime/tests/format/test_arguments.py index 72b9e6f..0ed3d04 100644 --- a/fluent.runtime/tests/format/test_arguments.py +++ b/fluent.runtime/tests/format/test_arguments.py @@ -2,14 +2,16 @@ import unittest -from fluent.runtime import FluentBundle +from fluent.runtime.errors import FluentReferenceError +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestNumbersInValues(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = Foo { $num } bar = { foo } @@ -41,9 +43,10 @@ def test_can_be_used_in_a_variant(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestStrings(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = { $arg } """)) @@ -52,3 +55,22 @@ def test_can_be_a_string(self): val, errs = self.ctx.format('foo', {'arg': 'Argument'}) self.assertEqual(val, 'Argument') self.assertEqual(len(errs), 0) + + +@all_fluent_bundle_implementations +class TestMissing(unittest.TestCase): + def setUp(self): + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) + self.ctx.add_messages(dedent_ftl(""" + foo = { $arg } + """)) + + def test_missing_with_empty_args_dict(self): + val, errs = self.ctx.format('foo', {}) + self.assertEqual(val, 'arg') + self.assertEqual(errs, [FluentReferenceError('Unknown external: arg')]) + + def test_missing_with_no_args_dict(self): + val, errs = self.ctx.format('foo') + self.assertEqual(val, 'arg') + self.assertEqual(errs, [FluentReferenceError('Unknown external: arg')]) diff --git a/fluent.runtime/tests/format/test_attributes.py b/fluent.runtime/tests/format/test_attributes.py index e2eb4e8..6d486d2 100644 --- a/fluent.runtime/tests/format/test_attributes.py +++ b/fluent.runtime/tests/format/test_attributes.py @@ -2,16 +2,17 @@ import unittest -from fluent.runtime import FluentBundle from fluent.runtime.errors import FluentReferenceError +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestAttributesWithStringValues(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = Foo .attr = Foo Attribute @@ -42,10 +43,11 @@ def test_can_be_formatted_directly_for_entities_with_pattern_values(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestAttributesWithSimplePatternValues(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = Foo bar = Bar @@ -90,9 +92,10 @@ def test_works_with_self_references_direct(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestMissing(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = Foo bar = Bar diff --git a/fluent.runtime/tests/format/test_builtins.py b/fluent.runtime/tests/format/test_builtins.py index 635196f..b56b434 100644 --- a/fluent.runtime/tests/format/test_builtins.py +++ b/fluent.runtime/tests/format/test_builtins.py @@ -4,17 +4,18 @@ from datetime import date, datetime from decimal import Decimal -from fluent.runtime import FluentBundle from fluent.runtime.errors import FluentReferenceError from fluent.runtime.types import fluent_date, fluent_number +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestNumberBuiltin(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" implicit-call = { 123456 } implicit-call2 = { $arg } @@ -110,10 +111,11 @@ def test_bad_arity(self): self.assertEqual(type(errs[0]), TypeError) +@all_fluent_bundle_implementations class TestDatetimeBuiltin(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" implicit-call = { $date } explicit-call = { DATETIME($date) } diff --git a/fluent.runtime/tests/format/test_functions.py b/fluent.runtime/tests/format/test_functions.py index 8377e7b..f260b77 100644 --- a/fluent.runtime/tests/format/test_functions.py +++ b/fluent.runtime/tests/format/test_functions.py @@ -4,13 +4,14 @@ import six -from fluent.runtime import FluentBundle from fluent.runtime.errors import FluentReferenceError from fluent.runtime.types import FluentNone, fluent_number +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestFunctionCalls(unittest.TestCase): def setUp(self): @@ -40,7 +41,7 @@ class Unsupported(object): RESTRICTED.ftl_arg_spec = (0, ['allowed']) - self.ctx = FluentBundle( + self.ctx = self.fluent_bundle_cls( ['en-US'], use_isolating=False, functions={'IDENTITY': IDENTITY, 'WITH_KEYWORD': WITH_KEYWORD, @@ -158,7 +159,6 @@ def test_bad_output(self): self.ctx.format('bad-output') self.assertIn("Unsupported", cm.exception.args[0]) - @unittest.expectedFailure def test_bad_output_2(self): # This is a developer error, so should raise an exception with self.assertRaises(TypeError) as cm: @@ -171,10 +171,11 @@ def test_non_identifier_python_keyword_args(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestMissing(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" missing = { MISSING(1) } """)) @@ -186,6 +187,7 @@ def test_falls_back_to_name_of_function(self): [FluentReferenceError("Unknown function: MISSING")]) +@all_fluent_bundle_implementations class TestResolving(unittest.TestCase): def setUp(self): @@ -195,7 +197,7 @@ def number_processor(number): self.args_passed.append(number) return number - self.ctx = FluentBundle( + self.ctx = self.fluent_bundle_cls( ['en-US'], use_isolating=False, functions={'NUMBER_PROCESSOR': number_processor}) @@ -220,6 +222,7 @@ def test_literals_passed_as_numbers(self): self.assertEqual(self.args_passed, [fluent_number(1)]) +@all_fluent_bundle_implementations class TestKeywordArgs(unittest.TestCase): def setUp(self): @@ -229,7 +232,7 @@ def my_function(arg, kwarg1=None, kwarg2="default"): self.args_passed.append((arg, kwarg1, kwarg2)) return arg - self.ctx = FluentBundle( + self.ctx = self.fluent_bundle_cls( ['en-US'], use_isolating=False, functions={'MYFUNC': my_function}) self.ctx.add_messages(dedent_ftl(""" diff --git a/fluent.runtime/tests/format/test_isolating.py b/fluent.runtime/tests/format/test_isolating.py index e14c1ec..9746b25 100644 --- a/fluent.runtime/tests/format/test_isolating.py +++ b/fluent.runtime/tests/format/test_isolating.py @@ -2,8 +2,7 @@ import unittest -from fluent.runtime import FluentBundle - +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl # Unicode bidi isolation characters. @@ -11,10 +10,11 @@ PDI = '\u2069' +@all_fluent_bundle_implementations class TestUseIsolating(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US']) + self.ctx = self.fluent_bundle_cls(['en-US']) self.ctx.add_messages(dedent_ftl(""" foo = Foo bar = { foo } Bar @@ -45,10 +45,11 @@ def test_isolates_complex_interpolations(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestSkipIsolating(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US']) + self.ctx = self.fluent_bundle_cls(['en-US']) self.ctx.add_messages(dedent_ftl(""" -brand-short-name = Amaya foo = { -brand-short-name } diff --git a/fluent.runtime/tests/format/test_parameterized_terms.py b/fluent.runtime/tests/format/test_parameterized_terms.py index 3ff3178..d4974f7 100644 --- a/fluent.runtime/tests/format/test_parameterized_terms.py +++ b/fluent.runtime/tests/format/test_parameterized_terms.py @@ -2,16 +2,18 @@ import unittest -from fluent.runtime import FluentBundle from fluent.runtime.errors import FluentFormatError, FluentReferenceError +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestParameterizedTerms(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) + self.ctx.add_messages(dedent_ftl(""" -thing = { $article -> *[definite] the thing @@ -68,10 +70,48 @@ def test_bad_term(self): self.assertEqual(errs, [FluentReferenceError('Unknown term: -missing')]) +@all_fluent_bundle_implementations +class TestParameterizedTermsWithNumbers(unittest.TestCase): + + def setUp(self): + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) + self.ctx.add_messages(dedent_ftl(""" + -thing = { $count -> + *[1] one thing + [2] two things + } + thing-no-arg = { -thing } + thing-no-arg-alt = { -thing() } + thing-one = { -thing(count: 1) } + thing-two = { -thing(count: 2) } + """)) + + def test_argument_omitted(self): + val, errs = self.ctx.format('thing-no-arg', {}) + self.assertEqual(val, 'one thing') + self.assertEqual(errs, []) + + def test_argument_omitted_2(self): + val, errs = self.ctx.format('thing-no-arg-alt', {}) + self.assertEqual(val, 'one thing') + self.assertEqual(errs, []) + + def test_thing_one(self): + val, errs = self.ctx.format('thing-one', {}) + self.assertEqual(val, 'one thing') + self.assertEqual(errs, []) + + def test_thing_two(self): + val, errs = self.ctx.format('thing-two', {}) + self.assertEqual(val, 'two things') + self.assertEqual(errs, []) + + +@all_fluent_bundle_implementations class TestParameterizedTermAttributes(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" -brand = Cool Thing .status = { $version -> @@ -107,10 +147,11 @@ def test_missing_attr(self): self.assertEqual(errs, [FluentReferenceError('Unknown attribute: -other.missing')]) +@all_fluent_bundle_implementations class TestNestedParameterizedTerms(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" -thing = { $article -> *[definite] { $first-letter -> @@ -155,10 +196,50 @@ def test_neither_arg(self): self.assertEqual(errs, []) +@all_fluent_bundle_implementations +class TestTermsWithTermReferences(unittest.TestCase): + + def setUp(self): + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) + self.ctx.add_messages(dedent_ftl(""" + -thing = { $article -> + *[definite] the { -other } + [indefinite] a { -other } + } + + -other = thing + + thing-with-arg = { -thing(article: "indefinite") } + thing-fallback = { -thing(article: "somethingelse") } + + -bad-term = { $article -> + *[all] Something wrong { -missing } + } + + uses-bad-term = { -bad-term } + """)) + + def test_with_argument(self): + val, errs = self.ctx.format('thing-with-arg', {}) + self.assertEqual(val, 'a thing') + self.assertEqual(errs, []) + + def test_fallback(self): + val, errs = self.ctx.format('thing-fallback', {}) + self.assertEqual(val, 'the thing') + self.assertEqual(errs, []) + + def test_term_with_missing_term_reference(self): + val, errs = self.ctx.format('uses-bad-term', {}) + self.assertEqual(val, 'Something wrong -missing') + self.assertEqual(errs, [FluentReferenceError('Unknown term: -missing',)]) + + +@all_fluent_bundle_implementations class TestTermsCalledFromTerms(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" -foo = {$a} {$b} -bar = {-foo(b: 2)} @@ -178,10 +259,11 @@ def test_term_args_isolated_without_call_syntax(self): self.assertEqual(errs, []) +@all_fluent_bundle_implementations class TestMessagesCalledFromTerms(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" msg = Msg is {$arg} -foo = {msg} diff --git a/fluent.runtime/tests/format/test_placeables.py b/fluent.runtime/tests/format/test_placeables.py index 963b386..84e34ab 100644 --- a/fluent.runtime/tests/format/test_placeables.py +++ b/fluent.runtime/tests/format/test_placeables.py @@ -2,15 +2,16 @@ import unittest -from fluent.runtime import FluentBundle from fluent.runtime.errors import FluentCyclicReferenceError, FluentReferenceError +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestPlaceables(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" message = Message .attr = Message Attribute @@ -39,6 +40,8 @@ def setUp(self): .attr = Attribute self-parent-ref-ok = Parent .attr = Attribute { self-parent-ref-ok } + -cyclic-term = { -cyclic-term } + cyclic-term-message = { -cyclic-term } """)) def test_placeable_message(self): @@ -82,19 +85,21 @@ def test_placeable_bad_term(self): def test_cycle_detection(self): val, errs = self.ctx.format('self-referencing-message', {}) - self.assertEqual(val, 'Text ???') + self.assertIn('???', val) self.assertEqual(len(errs), 1) - self.assertEqual( - errs, - [FluentCyclicReferenceError("Cyclic reference")]) + self.assertEqual(type(errs[0]), FluentCyclicReferenceError) def test_mutual_cycle_detection(self): val, errs = self.ctx.format('cyclic-msg1', {}) - self.assertEqual(val, 'Text1 Text2 ???') + self.assertIn('???', val) self.assertEqual(len(errs), 1) - self.assertEqual( - errs, - [FluentCyclicReferenceError("Cyclic reference")]) + self.assertEqual(type(errs[0]), FluentCyclicReferenceError) + + def test_term_cycle_detection(self): + val, errs = self.ctx.format('cyclic-term-message', {}) + self.assertIn('???', val) + self.assertEqual(len(errs), 1) + self.assertEqual(type(errs[0]), FluentCyclicReferenceError) def test_allowed_self_reference(self): val, errs = self.ctx.format('self-attribute-ref-ok', {}) @@ -105,44 +110,45 @@ def test_allowed_self_reference(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestSingleElementPattern(unittest.TestCase): def test_single_literal_number_isolating(self): - self.ctx = FluentBundle(['en-US'], use_isolating=True) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=True) self.ctx.add_messages('foo = { 1 }') val, errs = self.ctx.format('foo') self.assertEqual(val, '1') self.assertEqual(errs, []) def test_single_literal_number_non_isolating(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages('foo = { 1 }') val, errs = self.ctx.format('foo') self.assertEqual(val, '1') self.assertEqual(errs, []) def test_single_arg_number_isolating(self): - self.ctx = FluentBundle(['en-US'], use_isolating=True) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=True) self.ctx.add_messages('foo = { $arg }') val, errs = self.ctx.format('foo', {'arg': 1}) self.assertEqual(val, '1') self.assertEqual(errs, []) def test_single_arg_number_non_isolating(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages('foo = { $arg }') val, errs = self.ctx.format('foo', {'arg': 1}) self.assertEqual(val, '1') self.assertEqual(errs, []) def test_single_arg_missing_isolating(self): - self.ctx = FluentBundle(['en-US'], use_isolating=True) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=True) self.ctx.add_messages('foo = { $arg }') val, errs = self.ctx.format('foo') self.assertEqual(val, 'arg') self.assertEqual(len(errs), 1) def test_single_arg_missing_non_isolating(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages('foo = { $arg }') val, errs = self.ctx.format('foo') self.assertEqual(val, 'arg') diff --git a/fluent.runtime/tests/format/test_primitives.py b/fluent.runtime/tests/format/test_primitives.py index bdc1f66..93aab23 100644 --- a/fluent.runtime/tests/format/test_primitives.py +++ b/fluent.runtime/tests/format/test_primitives.py @@ -3,14 +3,16 @@ import unittest -from fluent.runtime import FluentBundle +import six +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestSimpleStringValue(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(r""" foo = Foo placeable-literal = { "Foo" } Bar @@ -34,6 +36,7 @@ def setUp(self): def test_can_be_used_as_a_value(self): val, errs = self.ctx.format('foo', {}) self.assertEqual(val, 'Foo') + self.assertEqual(type(val), six.text_type) self.assertEqual(len(errs), 0) def test_can_be_used_in_a_placeable(self): @@ -72,9 +75,10 @@ def test_escapes(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestComplexStringValue(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = Foo bar = { foo }Bar @@ -121,9 +125,10 @@ def test_can_be_a_value_of_an_attribute_used_as_a_selector(self): self.assertEqual(len(errs), 0) +@all_fluent_bundle_implementations class TestNumbers(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" one = { 1 } one_point_two = { 1.2 } diff --git a/fluent.runtime/tests/format/test_select_expression.py b/fluent.runtime/tests/format/test_select_expression.py index 8a1b77d..3ccb17f 100644 --- a/fluent.runtime/tests/format/test_select_expression.py +++ b/fluent.runtime/tests/format/test_select_expression.py @@ -2,16 +2,17 @@ import unittest -from fluent.runtime import FluentBundle from fluent.runtime.errors import FluentReferenceError +from .. import all_fluent_bundle_implementations from ..utils import dedent_ftl +@all_fluent_bundle_implementations class TestSelectExpressionWithStrings(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) def test_with_a_matching_selector(self): self.ctx.add_messages(dedent_ftl(""" @@ -57,11 +58,35 @@ def test_with_argument_expression(self): val, errs = self.ctx.format('foo', {'arg': 'a'}) self.assertEqual(val, "A") + def test_string_selector_with_plural_categories(self): + self.ctx.add_messages(dedent_ftl(""" + foo = { $arg -> + [something] A + *[other] B + } + """)) + # Even though 'other' matches a CLDR plural, this is not a plural + # category match, and should work without errors when we pass + # a string. + + val, errs = self.ctx.format('foo', {'arg': 'something'}) + self.assertEqual(val, "A") + self.assertEqual(errs, []) + val2, errs2 = self.ctx.format('foo', {'arg': 'other'}) + self.assertEqual(val2, "B") + self.assertEqual(errs2, []) + + val3, errs3 = self.ctx.format('foo', {'arg': 'not listed'}) + self.assertEqual(val3, "B") + self.assertEqual(errs3, []) + + +@all_fluent_bundle_implementations class TestSelectExpressionWithNumbers(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = { 1 -> *[0] A @@ -113,72 +138,158 @@ def test_with_float(self): self.assertEqual(val, "B") +@all_fluent_bundle_implementations +class TestSelectExpressionWithPlaceables(unittest.TestCase): + + def setUp(self): + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) + + def test_external_arguments_in_variants(self): + # We are testing several things: + # - that [b] variant doesn't trigger 'Unknown external: arg' + # - some logic in compiler implementation regarding when variables are looked up, + # so that [a] and [c] variants both can find 'arg'. + self.ctx.add_messages(dedent_ftl(""" + foo = { $lookup -> + [a] { $arg } + [b] B + *[c] { $arg } + } + """)) + # No args: + val1, errs1 = self.ctx.format('foo', {}) + self.assertEqual(val1, "arg") + self.assertEqual(errs1, + [ + FluentReferenceError("Unknown external: lookup"), + FluentReferenceError("Unknown external: arg"), + ]) + + # [a] branch, arg supplied + val2, errs2 = self.ctx.format('foo', {'lookup': 'a', 'arg': 'A'}) + self.assertEqual(val2, "A") + self.assertEqual(errs2, []) + + # [a] branch, arg not supplied + val3, errs3 = self.ctx.format('foo', {'lookup': 'a'}) + self.assertEqual(val3, "arg") + self.assertEqual(errs3, [FluentReferenceError("Unknown external: arg")]) + + # [b] branch + val4, errs4 = self.ctx.format('foo', {'lookup': 'b'}) + self.assertEqual(val4, "B") + self.assertEqual(errs4, []) + + # [c] branch, arg supplied + val5, errs5 = self.ctx.format('foo', {'lookup': 'c', 'arg': 'C'}) + self.assertEqual(val5, "C") + self.assertEqual(errs5, []) + + # [c] branch, arg not supplied + val6, errs6 = self.ctx.format('foo', {'lookup': 'c'}) + self.assertEqual(val6, "arg") + self.assertEqual(errs6, [FluentReferenceError("Unknown external: arg")]) + + +@all_fluent_bundle_implementations class TestSelectExpressionWithPluralCategories(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" foo = { 1 -> [one] A *[other] B } + foo-arg = { $count -> + [one] A + *[other] B + } + bar = { 1 -> [1] A *[other] B } + bar-arg = { $count -> + [1] A + *[other] B + } + baz = { "not a number" -> [one] A *[other] B } - qux = { $num -> + baz-arg = { $count -> [one] A *[other] B } + + qux = { 1.0 -> + [1] A + *[other] B + } + """)) - def test_selects_the_right_category(self): + def test_selects_the_right_category_with_integer_static(self): val, errs = self.ctx.format('foo', {}) self.assertEqual(val, "A") self.assertEqual(len(errs), 0) - def test_selects_exact_match(self): - val, errs = self.ctx.format('bar', {}) + def test_selects_the_right_category_with_integer_runtime(self): + val, errs = self.ctx.format('foo-arg', {'count': 1}) self.assertEqual(val, "A") - self.assertEqual(len(errs), 0) + self.assertEqual(errs, []) - def test_selects_default_with_invalid_selector(self): - val, errs = self.ctx.format('baz', {}) + val, errs = self.ctx.format('foo-arg', {'count': 2}) self.assertEqual(val, "B") - self.assertEqual(len(errs), 0) + self.assertEqual(errs, []) - def test_with_a_missing_selector(self): + def test_selects_the_right_category_with_float_static(self): val, errs = self.ctx.format('qux', {}) - self.assertEqual(val, "B") - self.assertEqual(errs, - [FluentReferenceError("Unknown external: num")]) + self.assertEqual(val, "A") + self.assertEqual(len(errs), 0) - def test_with_argument_integer(self): - val, errs = self.ctx.format('qux', {'num': 1}) + def test_selects_the_right_category_with_float_runtime(self): + val, errs = self.ctx.format('foo-arg', {'count': 1.0}) self.assertEqual(val, "A") self.assertEqual(len(errs), 0) - val, errs = self.ctx.format('qux', {'num': 2}) - self.assertEqual(val, "B") + def test_selects_exact_match_static(self): + val, errs = self.ctx.format('bar', {}) + self.assertEqual(val, "A") self.assertEqual(len(errs), 0) - def test_with_argument_float(self): - val, errs = self.ctx.format('qux', {'num': 1.0}) + def test_selects_exact_match_runtime(self): + val, errs = self.ctx.format('bar-arg', {'count': 1}) self.assertEqual(val, "A") self.assertEqual(len(errs), 0) + def test_selects_default_with_invalid_selector_static(self): + val, errs = self.ctx.format('baz', {}) + self.assertEqual(val, "B") + self.assertEqual(len(errs), 0) + + def test_selects_default_with_invalid_selector_runtime(self): + val, errs = self.ctx.format('baz-arg', {'count': 'not a number'}) + self.assertEqual(val, "B") + self.assertEqual(len(errs), 0) + + def test_with_a_missing_selector(self): + val, errs = self.ctx.format('foo-arg', {}) + self.assertEqual(val, "B") + self.assertEqual(errs, + [FluentReferenceError("Unknown external: count")]) + +@all_fluent_bundle_implementations class TestSelectExpressionWithTerms(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" -my-term = term .attr = termattribute diff --git a/fluent.runtime/tests/test_bomb.py b/fluent.runtime/tests/test_bomb.py index 082de96..3906cb0 100644 --- a/fluent.runtime/tests/test_bomb.py +++ b/fluent.runtime/tests/test_bomb.py @@ -2,15 +2,20 @@ import unittest -from fluent.runtime import FluentBundle +from fluent.runtime import InterpretingFluentBundle from .utils import dedent_ftl +# Only InterpretingFluentBundle has protection, for compiler it is too much of +# a (relative) performance hit and an increase in complexity to track this issue. + class TestBillionLaughs(unittest.TestCase): + fluent_bundle_cls = InterpretingFluentBundle + def setUp(self): - self.ctx = FluentBundle(['en-US'], use_isolating=False) + self.ctx = self.fluent_bundle_cls(['en-US'], use_isolating=False) self.ctx.add_messages(dedent_ftl(""" lol0 = 01234567890123456789012345678901234567890123456789 lol1 = {lol0}{lol0}{lol0}{lol0}{lol0}{lol0}{lol0}{lol0}{lol0}{lol0} diff --git a/fluent.runtime/tests/test_bundle.py b/fluent.runtime/tests/test_bundle.py index 63710e8..1a18523 100644 --- a/fluent.runtime/tests/test_bundle.py +++ b/fluent.runtime/tests/test_bundle.py @@ -3,14 +3,16 @@ import unittest -from fluent.runtime import FluentBundle +from fluent.runtime.errors import FluentDuplicateMessageId, FluentJunkFound, FluentReferenceError +from . import all_fluent_bundle_implementations from .utils import dedent_ftl +@all_fluent_bundle_implementations class TestFluentBundle(unittest.TestCase): def setUp(self): - self.ctx = FluentBundle(['en-US']) + self.ctx = self.fluent_bundle_cls(['en-US']) def test_add_messages(self): self.ctx.add_messages(dedent_ftl(""" @@ -25,10 +27,12 @@ def test_add_messages(self): def test_has_message(self): self.ctx.add_messages(dedent_ftl(""" foo = Foo + -term = Term """)) self.assertTrue(self.ctx.has_message('foo')) self.assertFalse(self.ctx.has_message('bar')) + self.assertFalse(self.ctx.has_message('-term')) def test_has_message_for_term(self): self.ctx.add_messages(dedent_ftl(""" @@ -48,7 +52,7 @@ def test_has_message_with_attribute(self): self.assertFalse(self.ctx.has_message('foo.other-attribute')) def test_plural_form_english_ints(self): - ctx = FluentBundle(['en-US']) + ctx = self.fluent_bundle_cls(['en-US']) self.assertEqual(ctx._plural_form(0), 'other') self.assertEqual(ctx._plural_form(1), @@ -57,7 +61,7 @@ def test_plural_form_english_ints(self): 'other') def test_plural_form_english_floats(self): - ctx = FluentBundle(['en-US']) + ctx = self.fluent_bundle_cls(['en-US']) self.assertEqual(ctx._plural_form(0.0), 'other') self.assertEqual(ctx._plural_form(1.0), @@ -70,7 +74,7 @@ def test_plural_form_english_floats(self): def test_plural_form_french(self): # Just spot check one other, to ensure that we # are not getting the EN locale by accident or - ctx = FluentBundle(['fr']) + ctx = self.fluent_bundle_cls(['fr']) self.assertEqual(ctx._plural_form(0), 'one') self.assertEqual(ctx._plural_form(1), @@ -110,3 +114,35 @@ def test_message_and_term_separate(self): val, errs = self.ctx.format('foo', {}) self.assertEqual(val, 'Refers to \u2068Foo\u2069') self.assertEqual(errs, []) + + def test_check_messages_duplicate(self): + self.ctx.add_messages("foo = Foo\n" + "foo = Bar\n") + checks = self.ctx.check_messages() + self.assertEqual(checks, + [('foo', FluentDuplicateMessageId("Additional definition for 'foo' discarded."))]) + # Earlier takes precedence + self.assertEqual(self.ctx.format('foo')[0], 'Foo') + + def test_check_messages_junk(self): + self.ctx.add_messages("unfinished") + checks = self.ctx.check_messages() + self.assertEqual(len(checks), 1) + check1_name, check1_error = checks[0] + self.assertEqual(check1_name, None) + self.assertEqual(type(check1_error), FluentJunkFound) + self.assertEqual(check1_error.message, 'Junk found: Expected token: "="') + self.assertEqual(check1_error.annotations[0].message, 'Expected token: "="') + + def test_check_messages_compile_errors(self): + self.ctx.add_messages("foo = { -missing }") + checks = self.ctx.check_messages() + if self.ctx.__class__.__name__ == "CompilingFluentBundle": + # CompilingFluentBundle is able to do more static checks. + self.assertEqual(len(checks), 1) + check1_name, check1_error = checks[0] + self.assertEqual(check1_name, 'foo') + self.assertEqual(type(check1_error), FluentReferenceError) + self.assertEqual(check1_error.args[0], 'Unknown term: -missing') + else: + self.assertEqual(len(checks), 0) diff --git a/fluent.runtime/tests/test_codegen.py b/fluent.runtime/tests/test_codegen.py new file mode 100644 index 0000000..602855e --- /dev/null +++ b/fluent.runtime/tests/test_codegen.py @@ -0,0 +1,557 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, unicode_literals + +import ast +import sys +import textwrap +import unittest + +from ast_decompiler import decompiler +from hypothesis import given +from hypothesis.strategies import text + +from fluent.runtime import codegen +from fluent.runtime.utils import allowable_name + + +def normalize_python(txt): + return textwrap.dedent(txt.rstrip()).strip() + + +class TestDecompiler(decompiler.Decompiler): + if sys.version_info < (3, 0): + # We override one bit of behaviour to make Python 2 testing simpler. + + # We don't need 'u' prefixes on strings because we are using from + # __future__ import unicode_literals. Therefore we omit them in the + # decompiled output. This function is copy-pasted from Decompiler. + # with just the unicode branch changed. + + def visit_Str(self, node): + if sys.version_info < (3, 0): + if self.has_unicode_literals and isinstance(node.s, str): + self.write('b') + # REMOVED + # elif isinstance(node.s, unicode): + # self.write('u') + if sys.version_info >= (3, 6) and self.has_parent_of_type(ast.FormattedValue): + delimiter = '"' + else: + delimiter = "'" + self.write(delimiter) + s = node.s.encode('unicode-escape').decode('ascii') + self.write(s.replace(delimiter, '\\' + delimiter)) + self.write(delimiter) + + +def decompile(ast, indentation=4, line_length=100, starting_indentation=0): + """Decompiles an AST into Python code. + """ + decompiler = TestDecompiler( + indentation=indentation, + line_length=line_length, + starting_indentation=starting_indentation, + ) + return decompiler.run(ast) + + +def decompile_ast_list(ast_list): + return decompile(ast.Module(body=ast_list, + **codegen.DEFAULT_AST_ARGS)) + + +def as_source_code(codegen_ast): + if not hasattr(codegen_ast, 'as_ast'): + ast_list = codegen_ast.as_ast_list() + else: + ast_list = [codegen_ast.as_ast()] + return decompile_ast_list(ast_list) + + +class TestCodeGen(unittest.TestCase): + + def assertCodeEqual(self, code1, code2): + self.assertEqual(normalize_python(code1), + normalize_python(code2)) + + def test_reserve_name(self): + scope = codegen.Scope() + name1 = scope.reserve_name('name') + name2 = scope.reserve_name('name') + self.assertEqual(name1, 'name') + self.assertNotEqual(name1, name2) + self.assertEqual(name2, 'name2') + + def test_reserve_name_function_arg_disallowed(self): + scope = codegen.Scope() + scope.reserve_name('name') + self.assertRaises(AssertionError, + scope.reserve_name, + 'name', + function_arg=True) + + def test_reserve_name_function_arg(self): + scope = codegen.Scope() + scope.reserve_function_arg_name('arg_name') + scope.reserve_name('myfunc') + func = codegen.Function('myfunc', + args=['arg_name'], + parent_scope=scope) + self.assertNotIn('arg_name2', func.all_reserved_names()) + + def test_reserve_name_nested(self): + parent = codegen.Scope() + parent_name = parent.reserve_name('name') + self.assertEqual(parent_name, 'name') + + child1 = codegen.Scope(parent_scope=parent) + child2 = codegen.Scope(parent_scope=parent) + + child1_name = child1.reserve_name('name') + self.assertNotEqual(child1_name, parent_name) + + child2_name = child2.reserve_name('name') + self.assertNotEqual(child2_name, parent_name) + + # But children can have same names, they don't shadow each other. + # To be deterministic, we expect the same name + self.assertEqual(child1_name, child2_name) + + def test_reserve_name_after_reserve_function_arg(self): + scope = codegen.Scope() + scope.reserve_function_arg_name('my_arg') + name = scope.reserve_name('my_arg') + self.assertEqual(name, 'my_arg2') + + def test_reserve_function_arg_after_reserve_name(self): + scope = codegen.Scope() + scope.reserve_name('my_arg') + self.assertRaises(AssertionError, + scope.reserve_function_arg_name, + 'my_arg') + + def test_name_properties(self): + scope = codegen.Scope() + scope.reserve_name('name', properties={'FOO': True}) + self.assertEqual(scope.get_name_properties('name'), + {'FOO': True}) + + def test_function(self): + module = codegen.Module() + func = codegen.Function('myfunc', args=['myarg1', 'myarg2'], + parent_scope=module.scope) + self.assertCodeEqual(as_source_code(func), """ + def myfunc(myarg1, myarg2): + pass + """) + + def test_function_return(self): + module = codegen.Module() + func = codegen.Function('myfunc', + parent_scope=module) + func.add_return(codegen.String("Hello")) + self.assertCodeEqual(as_source_code(func), """ + def myfunc(): + return 'Hello' + """) + + def test_function_bad_name(self): + module = codegen.Module() + func = codegen.Function('my func', args=[], + parent_scope=module) + self.assertRaises(AssertionError, as_source_code, func) + + def test_function_bad_arg(self): + module = codegen.Module() + func = codegen.Function('myfunc', args=['my arg'], + parent_scope=module.scope) + self.assertRaises(AssertionError, as_source_code, func) + + def test_add_function(self): + module = codegen.Module() + func_name = module.scope.reserve_name('myfunc') + func = codegen.Function(func_name, + parent_scope=module) + module.add_function(func_name, func) + self.assertCodeEqual(as_source_code(module), """ + def myfunc(): + pass + """) + + def test_variable_reference(self): + scope = codegen.Scope() + name = scope.reserve_name('name') + ref = codegen.VariableReference(name, scope) + self.assertEqual(as_source_code(ref), 'name') + + def test_variable_reference_check(self): + scope = codegen.Scope() + self.assertRaises(AssertionError, + codegen.VariableReference, + 'name', + scope) + + def test_variable_reference_function_arg_check(self): + scope = codegen.Scope() + func_name = scope.reserve_name('myfunc') + func = codegen.Function(func_name, args=['my_arg'], + parent_scope=scope) + # Can't use undefined 'some_name' + self.assertRaises(AssertionError, + codegen.VariableReference, + 'some_name', + func) + # But can use function argument 'my_arg' + ref = codegen.VariableReference('my_arg', func) + self.assertCodeEqual(as_source_code(ref), 'my_arg') + + def test_variable_reference_bad(self): + module = codegen.Module() + name = module.scope.reserve_name('name') + ref = codegen.VariableReference(name, module.scope) + ref.name = 'bad name' + self.assertRaises(AssertionError, as_source_code, ref) + + def test_scope_variable_helper(self): + # Scope.variable is more convenient than using VariableReference + # manually, we use that from now on. + scope = codegen.Scope() + name = scope.reserve_name('name') + ref1 = codegen.VariableReference(name, scope) + ref2 = scope.variable(name) + self.assertEqual(ref1, ref2) + + def test_function_args_name_check(self): + module = codegen.Module() + module.scope.reserve_name('my_arg') + func_name = module.scope.reserve_name('myfunc') + self.assertRaises(AssertionError, + codegen.Function, + func_name, args=['my_arg'], + parent_scope=module.scope) + + def test_function_args_name_reserved_check(self): + module = codegen.Module() + module.scope.reserve_function_arg_name('my_arg') + func_name = module.scope.reserve_name('myfunc') + func = codegen.Function(func_name, args=['my_arg'], + parent_scope=module.scope) + func.add_return(func.variable('my_arg')) + self.assertCodeEqual(as_source_code(func), """ + def myfunc(my_arg): + return my_arg + """) + + def test_add_assignment_unreserved(self): + scope = codegen.Module() + self.assertRaises(AssertionError, + scope.add_assignment, + 'x', + codegen.String('a string')) + + def test_add_assignment_reserved(self): + module = codegen.Module() + name = module.scope.reserve_name('x') + module.add_assignment(name, codegen.String('a string')) + self.assertCodeEqual(as_source_code(module), """ + x = 'a string' + """) + + def test_add_assignment_bad(self): + module = codegen.Module() + name = module.scope.reserve_name('x') + module.add_assignment(name, codegen.String('a string')) + # We have to modify internals to force the error path, because + # add_assignment already does checking + module.statements[0].name = 'something with a space' + self.assertRaises(AssertionError, + as_source_code, module) + + def test_multiple_add_assignment(self): + # To make our code generation easier to reason about, we disallow + # assigning to same name twice. We can add trimming of unneeded + # temporaries as a later pass. + module = codegen.Module() + name = module.scope.reserve_name('x') + module.add_assignment(name, codegen.String('a string')) + self.assertRaises(AssertionError, + module.add_assignment, + name, codegen.String('another string')) + + def test_multiple_add_assignment_in_inherited_scope(self): + # try/if etc inherit their scope from function + scope = codegen.Scope() + scope.reserve_name('myfunc') + func = codegen.Function('myfunc', + args=[], + parent_scope=scope) + try_ = codegen.Try([], func) + name = func.reserve_name('name') + + # We'd really like to ensure no multiple assignments ever, + # but the way that if/try etc. work make that hard. + # Instead, we add a keyword argument to allow the second assignment. + try_.try_block.add_assignment(name, codegen.Number(1)) + self.assertRaises(AssertionError, + try_.try_block.add_assignment, + name, codegen.Number(2)) + self.assertRaises(AssertionError, + try_.except_block.add_assignment, + name, codegen.Number(2)) + try_.except_block.add_assignment(name, codegen.Number(2), + allow_multiple=True) + + def test_function_call_unknown(self): + scope = codegen.Scope() + self.assertRaises(AssertionError, + codegen.FunctionCall, + 'a_function', + [], + {}, + scope) + + def test_function_call_known(self): + module = codegen.Module() + module.scope.reserve_name('a_function') + func_call = codegen.FunctionCall('a_function', [], {}, module.scope) + self.assertCodeEqual(as_source_code(func_call), "a_function()") + + def test_function_call_args_and_kwargs(self): + module = codegen.Module() + module.scope.reserve_name('a_function') + func_call = codegen.FunctionCall('a_function', [codegen.Number(123)], {'x': codegen.String("hello")}, + module.scope) + self.assertCodeEqual(as_source_code(func_call), "a_function(123, x='hello')") + + def test_function_call_bad_name(self): + module = codegen.Module() + module.scope.reserve_name('a_function') + func_call = codegen.FunctionCall('a_function', [], {}, module.scope) + func_call.function_name = 'bad function name' + self.assertRaises(AssertionError, as_source_code, func_call) + + def test_function_call_bad_kwarg_names(self): + module = codegen.Module() + module.scope.reserve_name('a_function') + allowed_args = [ + # (name, allowed) pairs. + # We allow reserved names etc. because we can + # call these using **{} syntax + ('hyphen-ated', True), + ('class', True), + ('True', True), + (' pre_space', False), + ('post_space ', False), + ('mid space', False), + ('valid_arg', True), + ] + for arg_name, allowed in allowed_args: + func_call = codegen.FunctionCall('a_function', [], + {arg_name: codegen.String("a")}, + module.scope) + if allowed: + output = as_source_code(func_call) + self.assertNotEqual(output, '') + if not allowable_name(arg_name): + self.assertIn('**{', output) + else: + self.assertRaises(AssertionError, as_source_code, func_call) + + def test_function_call_sensitive(self): + module = codegen.Module() + module.scope.reserve_name('a_function') + func_call = codegen.FunctionCall('a_function', [], {}, module.scope) + # codegen should refuse to create a call to 'exec', there is no reason + # for us to generate code like that. + func_call.function_name = 'exec' + self.assertRaises(AssertionError, as_source_code, func_call) + + def test_method_call_bad_name(self): + scope = codegen.Module() + s = codegen.String("x") + method_call = codegen.MethodCall(s, 'bad method name', [], scope) + self.assertRaises(AssertionError, as_source_code, method_call) + + def test_try_catch(self): + scope = codegen.Scope() + scope.reserve_name('MyError') + try_ = codegen.Try([scope.variable('MyError')], scope) + self.assertCodeEqual(as_source_code(try_), """ + try: + pass + except MyError: + pass + """) + scope.reserve_name('x') + scope.reserve_name('y') + scope.reserve_name('z') + try_.try_block.add_assignment('x', codegen.String("x")) + try_.except_block.add_assignment('y', codegen.String("y")) + try_.else_block.add_assignment('z', codegen.String("z")) + self.assertCodeEqual(as_source_code(try_), """ + try: + x = 'x' + except MyError: + y = 'y' + else: + z = 'z' + """) + + def test_try_catch_multiple_exceptions(self): + scope = codegen.Scope() + scope.reserve_name('MyError') + scope.reserve_name('OtherError') + try_ = codegen.Try([scope.variable('MyError'), + scope.variable('OtherError')], scope) + self.assertCodeEqual(as_source_code(try_), """ + try: + pass + except (MyError, OtherError): + pass + """) + + def test_try_catch_has_assignment_for_name_1(self): + scope = codegen.Scope() + try_ = codegen.Try([], scope) + name = scope.reserve_name('foo') + self.assertFalse(try_.has_assignment_for_name(name)) + + # Just add to 'try' block + try_.try_block.add_assignment(name, codegen.String('x')) + # Not all branches define name, so overall can't trust the name + # to be defined at the end. + self.assertFalse(try_.has_assignment_for_name(name)) + + # Now add to 'except' block as well + try_.except_block.add_assignment(name, codegen.String('x'), allow_multiple=True) + self.assertTrue(try_.has_assignment_for_name(name)) + + def test_try_catch_has_assignment_for_name_2(self): + scope = codegen.Scope() + try_ = codegen.Try([], scope) + name = scope.reserve_name('foo') + + # Add to 'except' + try_.except_block.add_assignment(name, codegen.String('x')) + self.assertFalse(try_.has_assignment_for_name(name)) + + # Add to 'else' + try_.else_block.add_assignment(name, codegen.String('x'), allow_multiple=True) + self.assertTrue(try_.has_assignment_for_name(name)) + + def test_if_empty(self): + scope = codegen.Module() + if_statement = codegen.If(scope) + self.assertCodeEqual(as_source_code(if_statement), "") + + def test_if_one_if(self): + scope = codegen.Module() + if_statement = codegen.If(scope) + first_block = if_statement.add_if(codegen.Number(1)) + first_block.add_return(codegen.Number(2)) + self.assertCodeEqual(as_source_code(if_statement), """ + if 1: + return 2 + """) + + def test_if_two_ifs(self): + scope = codegen.Module() + if_statement = codegen.If(scope) + first_block = if_statement.add_if(codegen.Number(1)) + first_block.add_return(codegen.Number(2)) + second_block = if_statement.add_if(codegen.Number(3)) + second_block.add_return(codegen.Number(4)) + self.assertCodeEqual(as_source_code(if_statement), """ + if 1: + return 2 + elif 3: + return 4 + """) + + def test_if_with_else(self): + scope = codegen.Module() + if_statement = codegen.If(scope) + first_block = if_statement.add_if(codegen.Number(1)) + first_block.add_return(codegen.Number(2)) + if_statement.else_block.add_return(codegen.Number(3)) + self.assertCodeEqual(as_source_code(if_statement), """ + if 1: + return 2 + else: + return 3 + """) + + def test_if_no_ifs(self): + scope = codegen.Module() + if_statement = codegen.If(scope) + if_statement.else_block.add_return(codegen.Number(3)) + if_statement = codegen.simplify(if_statement) + self.assertCodeEqual(as_source_code(if_statement), """ + return 3 + """) + + @given(text()) + def test_string(self, t): + self.assertEqual(t, eval(as_source_code(codegen.String(t))), " for t = {!r}".format(t)) + + def test_string_join_empty(self): + join = codegen.StringJoin([]) + join = codegen.simplify(join) + self.assertCodeEqual(as_source_code(join), "''") + + def test_string_join_one(self): + join = codegen.StringJoin([codegen.String('hello')]) + join = codegen.simplify(join) + self.assertCodeEqual(as_source_code(join), "'hello'") + + def test_string_join_two(self): + module = codegen.Module() + module.scope.reserve_name('tmp') + var = module.scope.variable('tmp') + join = codegen.StringJoin([codegen.String('hello '), var]) + self.assertCodeEqual(as_source_code(join), "''.join(['hello ', tmp])") + + def test_string_join_collapse_strings(self): + scope = codegen.Scope() + scope.reserve_name('tmp') + var = scope.variable('tmp') + join1 = codegen.StringJoin([codegen.String('hello '), + codegen.String('there '), + var, + codegen.String(' how'), + codegen.String(' are you?'), + ]) + join1 = codegen.simplify(join1) + self.assertCodeEqual(as_source_code(join1), "''.join(['hello there ', tmp, ' how are you?'])") + + def test_cleanup_name(self): + for n, c in [('abc-def()[]ghi,.<>¡!?¿', 'abcdefghi'), # illegal chars + ('1abc', 'n1abc'), # leading digit + ('_allowed', '_allowed'), # leading _ (which is allowed) + ('-', 'n') # empty after removing illegals + ]: + self.assertEqual(codegen.cleanup_name(n), c) + + @given(text()) + def test_cleanup_name_not_empty(self, t): + self.assertTrue(len(codegen.cleanup_name(t)) > 0, " for t = {!r}".format(t)) + + @given(text()) + def test_cleanup_name_allowed_identifier(self, t): + self.assertTrue(allowable_name(codegen.cleanup_name(t)), " for t = {!r}".format(t)) + + def test_dict_lookup(self): + scope = codegen.Scope() + scope.reserve_name('tmp') + var = scope.variable('tmp') + lookup = codegen.DictLookup(var, codegen.String('x')) + self.assertCodeEqual(as_source_code(lookup), "tmp['x']") + + def test_equals(self): + eq = codegen.Equals(codegen.String('x'), codegen.String('y')) + self.assertCodeEqual(as_source_code(eq), "'x' == 'y'") + + def test_or(self): + or_ = codegen.Or(codegen.String('x'), codegen.String('y')) + self.assertCodeEqual(as_source_code(or_), "'x' or 'y'") diff --git a/fluent.runtime/tests/test_compiler.py b/fluent.runtime/tests/test_compiler.py new file mode 100644 index 0000000..0cbfbff --- /dev/null +++ b/fluent.runtime/tests/test_compiler.py @@ -0,0 +1,758 @@ +from __future__ import absolute_import, unicode_literals + +import unittest + +from fluent.runtime import CompilingFluentBundle +from fluent.runtime.compiler import messages_to_module +from fluent.runtime.errors import FluentCyclicReferenceError, FluentFormatError, FluentReferenceError + +from .test_codegen import decompile_ast_list, normalize_python +from .utils import dedent_ftl + +# Some TDD tests to help develop CompilingFluentBundle. It should be possible to delete +# the tests here and still have complete test coverage of the compiler.py module, via +# the other FluentBundle.format tests. + + +def compile_messages_to_python(source, locale, use_isolating=False, functions=None): + # We use CompilingFluentBundle partially here, but then switch to + # messages_to_module instead of compile_messages so that we can get the AST + # back instead of a compiled function. + bundle = CompilingFluentBundle([locale], use_isolating=use_isolating, functions=functions) + bundle.add_messages(dedent_ftl(source)) + module, message_mapping, module_globals, errors = messages_to_module( + bundle._messages_and_terms, bundle._babel_locale, + use_isolating=bundle.use_isolating, + functions=bundle._functions) + return decompile_ast_list([module.as_ast()]), errors + + +class TestCompiler(unittest.TestCase): + locale = 'en_US' + + maxDiff = None + + def assertCodeEqual(self, code1, code2): + self.assertEqual(normalize_python(code1), + normalize_python(code2)) + + def test_single_string_literal(self): + code, errs = compile_messages_to_python(""" + foo = Foo + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Foo' + """) + self.assertEqual(errs, []) + + def test_string_literal_in_placeable(self): + code, errs = compile_messages_to_python(""" + foo = { "Foo" } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Foo' + """) + self.assertEqual(errs, []) + + def test_number_literal(self): + code, errs = compile_messages_to_python(""" + foo = { 123 } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return NUMBER(123).format(locale) + """) + self.assertEqual(errs, []) + + def test_interpolated_number(self): + code, errs = compile_messages_to_python(""" + foo = x { 123 } y + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return ''.join(['x ', NUMBER(123).format(locale), ' y']) + """) + self.assertEqual(errs, []) + + def test_message_reference_plus_string_literal(self): + code, errs = compile_messages_to_python(""" + foo = Foo + bar = X { foo } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Foo' + + def bar(message_args, errors): + return ''.join(['X ', foo(message_args, errors)]) + """) + self.assertEqual(errs, []) + + def test_single_message_reference(self): + code, errs = compile_messages_to_python(""" + foo = Foo + bar = { foo } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Foo' + + def bar(message_args, errors): + return foo(message_args, errors) + """) + self.assertEqual(errs, []) + + def test_message_attr_reference(self): + code, errs = compile_messages_to_python(""" + foo = + .attr = Foo Attr + bar = { foo.attr } + """, self.locale) + self.assertCodeEqual(code, """ + def foo__attr(message_args, errors): + return 'Foo Attr' + + def bar(message_args, errors): + return foo__attr(message_args, errors) + """) + self.assertEqual(errs, []) + + def test_single_message_reference_reversed_order(self): + # We should cope with forward references + code, errs = compile_messages_to_python(""" + bar = { foo } + foo = Foo + """, self.locale) + self.assertCodeEqual(code, """ + def bar(message_args, errors): + return foo(message_args, errors) + + def foo(message_args, errors): + return 'Foo' + """) + self.assertEqual(errs, []) + + def test_single_message_bad_reference(self): + code, errs = compile_messages_to_python(""" + bar = { foo } + """, self.locale) + # We already know that foo does not exist, so we can hard code the error + # into the function for the runtime error. + self.assertCodeEqual(code, """ + def bar(message_args, errors): + errors.append(FluentReferenceError('Unknown message: foo')) + return 'foo' + """) + # And we should get a compile time error: + self.assertEqual(errs, [('bar', FluentReferenceError("Unknown message: foo"))]) + + def test_name_collision_function_args(self): + code, errs = compile_messages_to_python(""" + errors = Errors + """, self.locale) + self.assertCodeEqual(code, """ + def errors2(message_args, errors): + return 'Errors' + """) + self.assertEqual(errs, []) + + def test_name_collision_builtins(self): + code, errs = compile_messages_to_python(""" + zip = Zip + """, self.locale) + self.assertCodeEqual(code, """ + def zip2(message_args, errors): + return 'Zip' + """) + self.assertEqual(errs, []) + + def test_name_collision_keyword(self): + code, errs = compile_messages_to_python(""" + class = Class + """, self.locale) + self.assertCodeEqual(code, """ + def class2(message_args, errors): + return 'Class' + """) + self.assertEqual(errs, []) + + def test_message_mapping_used(self): + # Checking that we actually use message_mapping when looking up the name + # of the message function to call. + code, errs = compile_messages_to_python(""" + zip = Foo + str = { zip } + """, self.locale) + self.assertCodeEqual(code, """ + def zip2(message_args, errors): + return 'Foo' + + def str2(message_args, errors): + return zip2(message_args, errors) + """) + self.assertEqual(errs, []) + + def test_external_argument(self): + code, errs = compile_messages_to_python(""" + with-arg = { $arg } + """, self.locale) + self.assertCodeEqual(code, """ + def with_arg(message_args, errors): + try: + _arg = message_args['arg'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: arg')) + _arg = FluentNone('arg') + _arg_h = _arg + else: + _arg_h = handle_argument(_arg, 'arg', locale, errors) + return handle_output(_arg_h, locale, errors) + """) + self.assertEqual(errs, []) + + def test_function_call(self): + code, errs = compile_messages_to_python(""" + foo = { NUMBER(12345) } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return NUMBER(12345).format(locale) + """) + self.assertEqual(errs, []) + + def test_function_call_external_arg(self): + code, errs = compile_messages_to_python(""" + foo = { NUMBER($arg) } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['arg'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: arg')) + _arg = FluentNone('arg') + _arg_h = _arg + else: + _arg_h = handle_argument(_arg, 'arg', locale, errors) + return NUMBER(_arg_h).format(locale) + """) + self.assertEqual(errs, []) + + def test_function_call_kwargs(self): + code, errs = compile_messages_to_python(""" + foo = { NUMBER(12345, useGrouping: 0) } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return NUMBER(12345, useGrouping=0).format(locale) + """) + self.assertEqual(errs, []) + + def test_missing_function_call(self): + code, errs = compile_messages_to_python(""" + foo = { MISSING(123) } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + errors.append(FluentReferenceError('Unknown function: MISSING')) + return 'MISSING()' + """), + self.assertEqual(errs, [('foo', FluentReferenceError('Unknown function: MISSING'))]) + + def test_function_call_with_bad_keyword_arg(self): + def MYFUNC(arg, kw1=None, kw2=None): + return arg + # Disallow 'kw2' arg + MYFUNC.ftl_arg_spec = (1, ['kw1']) + code, errs = compile_messages_to_python(""" + foo = { MYFUNC(123, kw2: 1) } + """, self.locale, functions={'MYFUNC': MYFUNC}) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + errors.append(TypeError('MYFUNC() got an unexpected keyword argument \\'kw2\\'')) + return handle_output(MYFUNC(NUMBER(123)), locale, errors) + """), + self.assertEqual(len(errs), 1) + self.assertEqual(errs[0][0], 'foo') + self.assertEqual(type(errs[0][1]), TypeError) + + def test_function_call_with_bad_positional_arg(self): + def MYFUNC(): + return '' + code, errs = compile_messages_to_python(""" + foo = { MYFUNC(123) } + """, self.locale, functions={'MYFUNC': MYFUNC}) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + errors.append(TypeError('MYFUNC() takes 0 positional arguments but 1 were given')) + return handle_output(MYFUNC(), locale, errors) + """), + self.assertEqual(len(errs), 1) + self.assertEqual(errs[0][0], 'foo') + self.assertEqual(type(errs[0][1]), TypeError) + + def test_function_defined_with_bad_kwargs(self): + def MYFUNC(): + return '' + MYFUNC.ftl_arg_spec = (0, ['allowable-kwarg', 'invalid kwarg name']) + + code, errs = compile_messages_to_python(""" + foo = { MYFUNC() } + """, self.locale, functions={'MYFUNC': MYFUNC}) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return handle_output(MYFUNC(), locale, errors) + """), + self.assertEqual(errs, + [(None, FluentFormatError("MYFUNC() has invalid keyword argument name 'invalid kwarg name'"))]) + + def test_function_called_with_disallowed_kwarg(self): + def MYFUNC(arg=None): + return '' + MYFUNC.ftl_arg_spec = (0, ['arg']) + + code, errs = compile_messages_to_python(""" + foo = { MYFUNC(other: 123) } + """, self.locale, functions={'MYFUNC': MYFUNC}) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + errors.append(TypeError('MYFUNC() got an unexpected keyword argument \\'other\\'')) + return handle_output(MYFUNC(), locale, errors) + """), + self.assertEqual(len(errs), 1) + self.assertEqual(type(errs[0][1]), TypeError) + self.assertEqual(errs[0][1].args[0], "MYFUNC() got an unexpected keyword argument 'other'") + + def test_function_called_with_non_identifier_kwarg(self): + def MYFUNC(**kwargs): + return '' + + code, errs = compile_messages_to_python(""" + foo = { MYFUNC(non-identifier-name: "x") } + """, self.locale, functions={'MYFUNC': MYFUNC}) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return handle_output(MYFUNC(**{'non-identifier-name': 'x'}), locale, errors) + """), + self.assertEqual(errs, []) + + def test_message_with_attrs(self): + code, errs = compile_messages_to_python(""" + foo = Foo + .attr-1 = Attr 1 + .attr-2 = Attr 2 + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Foo' + + def foo__attr_1(message_args, errors): + return 'Attr 1' + + def foo__attr_2(message_args, errors): + return 'Attr 2' + """) + self.assertEqual(errs, []) + + def test_term_inline(self): + code, errs = compile_messages_to_python(""" + -term = Term + message = Message { -term } + """, self.locale) + self.assertCodeEqual(code, """ + def message(message_args, errors): + return 'Message Term' + """) + + def test_select_string_runtime(self): + code, errs = compile_messages_to_python(""" + foo = { $arg -> + [a] A + *[b] B + } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['arg'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: arg')) + _arg = FluentNone('arg') + if _arg == 'a': + _ret = 'A' + else: + _ret = 'B' + return _ret + """) + self.assertEqual(errs, []) + + def test_select_string_static(self): + code, errs = compile_messages_to_python(""" + foo = { "a" -> + [a] A + *[b] B + } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'A' + """) + self.assertEqual(errs, []) + + def test_select_number_static(self): + code, errs = compile_messages_to_python(""" + foo = { 1 -> + [1] One + *[2] Two + } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'One' + """) + self.assertEqual(errs, []) + + def test_select_number_runtime(self): + code, errs = compile_messages_to_python(""" + foo = { $arg -> + [1] One + *[2] { 2 } + } + """, self.locale) + # We should not get 'NUMBER' calls in the select expression or + # or the key comparisons, but we should get them for the select value + # for { 2 }. + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['arg'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: arg')) + _arg = FluentNone('arg') + if _arg == 1: + _ret = 'One' + else: + _ret = NUMBER(2).format(locale) + return _ret + """) + self.assertEqual(errs, []) + + def test_select_plural_category_with_literal(self): + code, errs = compile_messages_to_python(""" + foo = { 1 -> + [one] One + *[other] Other + } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'One' + """) + self.assertEqual(errs, []) + + def test_select_plural_category_with_arg(self): + code, errs = compile_messages_to_python(""" + foo = { $count -> + [0] You have nothing + [one] You have one thing + *[other] You have some things + } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['count'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: count')) + _arg = FluentNone('count') + _plural_form = plural_form_for_number(_arg) + if _arg == 0: + _ret = 'You have nothing' + elif _arg == 'one' or _plural_form == 'one': + _ret = 'You have one thing' + else: + _ret = 'You have some things' + return _ret + """) + self.assertEqual(errs, []) + + def test_combine_strings(self): + code, errs = compile_messages_to_python(""" + foo = Start { "Middle" } End + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Start Middle End' + """) + self.assertEqual(errs, []) + + def test_single_string_literal_isolating(self): + code, errs = compile_messages_to_python(""" + foo = Foo + """, self.locale, use_isolating=True) + # No isolating chars, because we have no placeables. + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'Foo' + """) + self.assertEqual(errs, []) + + def test_interpolation_isolating(self): + code, errs = compile_messages_to_python(""" + foo = Foo { $arg } Bar + """, self.locale, use_isolating=True) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['arg'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: arg')) + _arg = FluentNone('arg') + _arg_h = _arg + else: + _arg_h = handle_argument(_arg, 'arg', locale, errors) + return ''.join(['Foo \\u2068', handle_output(_arg_h, locale, errors), '\\u2069 Bar']) + """) + self.assertEqual(errs, []) + + def test_cycle_detection(self): + code, errs = compile_messages_to_python(""" + foo = { foo } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + errors.append(FluentCyclicReferenceError('Cyclic reference in foo')) + return '???' + """) + self.assertEqual(errs, [('foo', FluentCyclicReferenceError("Cyclic reference in foo"))]) + + def test_cycle_detection_false_positive_1(self): + # Test for a bug in early version of cycle detector + code, errs = compile_messages_to_python(""" + foo = { -bar }{ -bar } + -bar = Bar + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + return 'BarBar' + """) + self.assertEqual(errs, []) + + def test_cycle_detection_with_attrs(self): + code, errs = compile_messages_to_python(""" + foo = + .attr1 = { bar.attr2 } + + bar = + .attr2 = { foo.attr1 } + """, self.locale) + self.assertCodeEqual(code, """ + def foo__attr1(message_args, errors): + errors.append(FluentCyclicReferenceError('Cyclic reference in foo.attr1')) + return '???' + + def bar__attr2(message_args, errors): + errors.append(FluentCyclicReferenceError('Cyclic reference in bar.attr2')) + return '???' + """) + self.assertEqual(errs, [('foo.attr1', FluentCyclicReferenceError("Cyclic reference in foo.attr1")), + ('bar.attr2', FluentCyclicReferenceError("Cyclic reference in bar.attr2")), + ]) + + def test_term_cycle_detection(self): + code, errs = compile_messages_to_python(""" + -cyclic-term = { -cyclic-term } + cyclic-term-message = { -cyclic-term } + """, self.locale) + self.assertCodeEqual(code, """ + def cyclic_term_message(message_args, errors): + errors.append(FluentCyclicReferenceError('Cyclic reference in cyclic-term-message')) + return '???' + """) + self.assertEqual(errs, [('cyclic-term-message', + FluentCyclicReferenceError("Cyclic reference in cyclic-term-message")), + ]) + + def test_cycle_detection_with_unknown_attr(self): + # unknown attributes fall back to main message, which brings + # another option for a cycle. + code, errs = compile_messages_to_python(""" + foo = { bar.bad-attr } + + bar = { foo } + """, self.locale) + self.assertCodeEqual(code, """ + def foo(message_args, errors): + errors.append(FluentCyclicReferenceError('Cyclic reference in foo')) + return '???' + + def bar(message_args, errors): + errors.append(FluentCyclicReferenceError('Cyclic reference in bar')) + return '???' + """) + self.assertEqual(errs, [('foo', FluentCyclicReferenceError("Cyclic reference in foo")), + ('bar', FluentCyclicReferenceError("Cyclic reference in bar")), + ]) + + def test_parameterized_terms_inlined_for_string(self): + code, errs = compile_messages_to_python(""" + -thing = { $article -> + *[definite] the thing + [indefinite] a thing + } + the-thing = { -thing } + a-thing = { -thing(article: "indefinite") } + """, self.locale) + # select expression should be statically evaluated and inlined + self.assertCodeEqual(code, """ + def the_thing(message_args, errors): + return 'the thing' + + def a_thing(message_args, errors): + return 'a thing' + """) + + def test_parameterized_terms_inlined_for_number(self): + code, errs = compile_messages_to_python(""" + -thing = { $count -> + [1] a thing + *[2] some things + } + some-things = { -thing } + a-thing = { -thing(count: 1) } + """, self.locale) + # select expression should be statically evaluated and inlined + self.assertCodeEqual(code, """ + def some_things(message_args, errors): + return 'some things' + + def a_thing(message_args, errors): + return 'a thing' + """) + + def test_parameterized_terms_inlined_with_complex_selector(self): + code, errs = compile_messages_to_python(""" + -brand = Cool Thing + .status = { $version -> + [v2] available + *[v1] deprecated + } + + attr-with-arg = { -brand } is { -brand.status(version: "v2") -> + [available] available, yay! + *[deprecated] deprecated, sorry + } + """, self.locale) + self.assertCodeEqual(code, """ + def attr_with_arg(message_args, errors): + return 'Cool Thing is available, yay!' + """) + + def test_message_call_from_inside_term(self): + # This might get removed sometime, but for now it is a corner case we + # need to cover. + code, errs = compile_messages_to_python(""" + outer-message = { -term(a: 1, b: "hello") } + -term = Term { inner-message } + inner-message = { $a } { $b } + """, self.locale) + # outer-message should pass term args, not external args + self.assertCodeEqual(code, """ + def outer_message(message_args, errors): + return ''.join(['Term ', inner_message({'a': NUMBER(1), 'b': 'hello'}, errors)]) + + def inner_message(message_args, errors): + try: + _arg = message_args['a'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: a')) + _arg = FluentNone('a') + _arg_h = _arg + else: + _arg_h = handle_argument(_arg, 'a', locale, errors) + try: + _arg2 = message_args['b'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: b')) + _arg2 = FluentNone('b') + _arg_h2 = _arg2 + else: + _arg_h2 = handle_argument(_arg2, 'b', locale, errors) + return ''.join( + [handle_output(_arg_h, locale, errors), ' ', handle_output(_arg_h2, locale, errors)] + ) + """) + + def test_reuse_external_arguments(self): + code, errs = compile_messages_to_python(""" + foo = { $arg -> + [0] You have no items + [1] You have one item + *[2] You have { NUMBER($arg) } items + } + """, self.locale) + # We should re-use the work of getting $arg out of args and + # not do that twice. + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['arg'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: arg')) + _arg = FluentNone('arg') + if _arg == 0: + _ret = 'You have no items' + elif _arg == 1: + _ret = 'You have one item' + else: + _arg_h = handle_argument(_arg, 'arg', locale, errors) + _ret = ''.join(['You have ', NUMBER(_arg_h).format(locale), ' items']) + return _ret + """) + self.assertEqual(errs, []) + + def test_reuse_external_arguments_multiple_branches(self): + code, errs = compile_messages_to_python(""" + foo = { $lookup -> + [a] { $foo } + [b] B + *[c] { $foo } + } + """, self.locale) + # We should only do 'foo' lookup for first and last branch, for efficiency. + # But we need to be aware that in last branch, we haven't already looked + # up 'foo', despite the fact that we did it in an earlier branch. + self.assertCodeEqual(code, """ + def foo(message_args, errors): + try: + _arg = message_args['lookup'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: lookup')) + _arg = FluentNone('lookup') + if _arg == 'a': + try: + _arg2 = message_args['foo'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: foo')) + _arg2 = FluentNone('foo') + _arg_h = _arg2 + else: + _arg_h = handle_argument(_arg2, 'foo', locale, errors) + _ret = handle_output(_arg_h, locale, errors) + elif _arg == 'b': + _ret = 'B' + else: + try: + _arg3 = message_args['foo'] + except (LookupError, TypeError): + errors.append(FluentReferenceError('Unknown external: foo')) + _arg3 = FluentNone('foo') + _arg_h2 = _arg3 + else: + _arg_h2 = handle_argument(_arg3, 'foo', locale, errors) + _ret = handle_output(_arg_h2, locale, errors) + return _ret + """) + self.assertEqual(errs, []) diff --git a/fluent.runtime/tools/benchmarks/gettext_comparisons.py b/fluent.runtime/tools/benchmarks/gettext_comparisons.py new file mode 100755 index 0000000..60517ee --- /dev/null +++ b/fluent.runtime/tools/benchmarks/gettext_comparisons.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python + +# This should be run using pytest, see end of file +from __future__ import unicode_literals + +import os +import subprocess +import sys +from gettext import translation + +import pytest +import six + +from fluent.runtime import CompilingFluentBundle, InterpretingFluentBundle + +this_file = os.path.abspath(__file__) +this_dir = os.path.dirname(this_file) +locale_dir = os.path.join(this_dir, "locale") +messages_dir = os.path.join(locale_dir, "pl", "LC_MESSAGES") +FTL_MESSAGES = """ +single-string-literal = Hello I am a single string literal in Polish + +single-interpolation = Hello { $username }, welcome to our website! in Polish + +# Don't include the count in the output, to test just the speed of the plural +# form lookup, rather than the locale aware number formatting routines. + +plural-form-select = { $count -> + [one] There is one thing, in Polish + [few] There are few things, in Polish + [many] There are many things, in Polish + *[other] There are other things, in Polish + } +""" + + +@pytest.fixture(scope="module") +def gettext_translations(): + pot_file = os.path.join(this_dir, "benchmark.pot") + po_file = os.path.join(messages_dir, "benchmark.po") + if not os.path.exists(messages_dir): + os.makedirs(messages_dir) + subprocess.check_call(["pybabel", "extract", "-o", pot_file, this_file]) + do_dummy_translation(pot_file, po_file) + + mo_file = os.path.join(messages_dir, "benchmark.mo") + subprocess.check_call(["pybabel", "compile", "-f", "-i", po_file, "-o", mo_file]) + translation_obj = translation("benchmark", localedir=locale_dir, languages=['pl']) + return translation_obj + + +dummy_gettext_plural_translations = { + "There is %(count)d thing": + ["There is one thing, in Polish", + "There are few things, in Polish", + "There are many things, in Polish", + "There are other things, in Polish", + ] +} + + +def do_dummy_translation(pot_file, po_file): + # Copy and fill in some default translations + with open(pot_file, "r") as f: + contents = f.read() + output = [] + last_id = None + for line in contents.split("\n"): + if line.startswith("msgid \""): + last_id = line.replace("msgid ", "").strip('"') + + if line.startswith("msgstr "): + # Generate 'translation': + msgstr = 'msgstr "{0} in Polish"'.format(last_id) + output.append(msgstr) + elif line.startswith('msgstr[0]'): + msgstrs = dummy_gettext_plural_translations[last_id] + for i, msgstr in enumerate(msgstrs): + output.append('''msgstr[{0}] "{1}"'''.format(i, msgstr)) + elif line.startswith('msgstr['): + pass # ignore, done these already + else: + output.append(line) + + if line.startswith('"Generated-By:'): + # extra header stuff: + output.append(r'''"Language: pl\n"''') + output.append(r'''"Plural-Forms: nplurals=4; plural=(n==1 ? 0 : (n%10>=2 && n%10<=4) && (n%100<12 || n%100>=14) ? 1 : n!=1 && (n%10>=0 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3);\n"''') + + with open(po_file, "w") as f: + f.write("\n".join(output)) + + +@pytest.fixture +def interpreting_fluent_bundle(): + return build_fluent_bundle(InterpretingFluentBundle) + + +@pytest.fixture +def compiling_fluent_bundle(): + ctx = build_fluent_bundle(CompilingFluentBundle) + ctx._compile() + return ctx + + +def build_fluent_bundle(cls): + # We choose 'use_isolating=False' for feature parity with gettext + ctx = cls(['pl'], use_isolating=False) + ctx.add_messages(FTL_MESSAGES) + return ctx + + +def unicode_gettext_method(gettext_translations): + if hasattr(gettext_translations, 'ugettext'): + return gettext_translations.ugettext + else: + return gettext_translations.gettext + + +def unicode_ngettext_method(gettext_translations): + if hasattr(gettext_translations, 'ungettext'): + return gettext_translations.ungettext + else: + return gettext_translations.ngettext + + +def test_single_string_gettext(gettext_translations, benchmark): + gettext_translations.gettext("Hello I am a single string literal") # for extract process + result = benchmark(unicode_gettext_method(gettext_translations), "Hello I am a single string literal") + assert result == "Hello I am a single string literal in Polish" + assert type(result) is six.text_type + + +def test_single_string_fluent_interpreter(interpreting_fluent_bundle, benchmark): + result = benchmark(interpreting_fluent_bundle.format, 'single-string-literal') + assert result[0] == "Hello I am a single string literal in Polish" + assert type(result[0]) is six.text_type + + +def test_single_string_fluent_compiler(compiling_fluent_bundle, benchmark): + result = benchmark(compiling_fluent_bundle.format, 'single-string-literal') + assert result[0] == "Hello I am a single string literal in Polish" + assert type(result[0]) is six.text_type + + +def test_single_interpolation_gettext(gettext_translations, benchmark): + gettext_translations.gettext("Hello %(username)s, welcome to our website!") # for extract process + t = unicode_gettext_method(gettext_translations) + args = {'username': 'Mary'} + result = benchmark(lambda: t("Hello %(username)s, welcome to our website!") % args) + assert result == "Hello Mary, welcome to our website! in Polish" + assert type(result) is six.text_type + + +def test_single_interpolation_fluent_interpreter(interpreting_fluent_bundle, benchmark): + args = {'username': 'Mary'} + result = benchmark(interpreting_fluent_bundle.format, 'single-interpolation', args) + assert result[0] == "Hello Mary, welcome to our website! in Polish" + assert type(result[0]) is six.text_type + + +def test_single_interpolation_fluent_compiler(compiling_fluent_bundle, benchmark): + args = {'username': 'Mary'} + result = benchmark(compiling_fluent_bundle.format, 'single-interpolation', args) + assert result[0] == "Hello Mary, welcome to our website! in Polish" + assert type(result[0]) is six.text_type + + +def test_plural_form_select_gettext(gettext_translations, benchmark): + gettext_translations.ngettext("There is %(count)d thing", "There are %(count)d things", 1) # for extract process + t = unicode_ngettext_method(gettext_translations) + + def f(): + for i in range(0, 10): + t("There is %(count)d thing", "There are %(count)d things", i) + + benchmark(f) + + +def test_plural_form_select_fluent_compiler(compiling_fluent_bundle, benchmark): + return _test_plural_form_select_fluent(compiling_fluent_bundle, benchmark) + + +def test_plural_form_select_fluent_interpreter(interpreting_fluent_bundle, benchmark): + return _test_plural_form_select_fluent(interpreting_fluent_bundle, benchmark) + + +def _test_plural_form_select_fluent(ctx, benchmark): + def f(): + for i in range(0, 10): + ctx.format("plural-form-select", {'count': i})[0] + + benchmark(f) + + +if __name__ == '__main__': + # You can execute this file directly, and optionally add more py.test args + # to the command line (e.g. -k for keyword matching certain tests). + subprocess.check_call(["py.test", "--benchmark-warmup=on", "--benchmark-sort=name", this_file] + sys.argv[1:]) diff --git a/fluent.runtime/tox.ini b/fluent.runtime/tox.ini index 5d2553b..04f864b 100644 --- a/fluent.runtime/tox.ini +++ b/fluent.runtime/tox.ini @@ -7,12 +7,19 @@ skipsdist=True setenv = PYTHONPATH = {toxinidir} deps = + # Just '.[develop]' would be nice here. + # Unfortunately it is super slow: https://github.com/pypa/pip/issues/2195 + # So we duplicate deps from setup.py for now. syntax0.15: fluent.syntax==0.15 syntax0.14: fluent.syntax==0.14 attrs==19.1.0 babel==2.6.0 pytz==2018.9 six==1.12.0 + ast_decompiler>=0.3.2 + hypothesis>=4.9.0 + py27: singledispatch + pypy: singledispatch commands = ./runtests.py [testenv:latest]