Skip to content

Commit

Permalink
Merge pull request #5184 from StackStorm/v3.4-updates
Browse files Browse the repository at this point in the history
Updates for v3.4.1
  • Loading branch information
blag authored Mar 12, 2021
2 parents cc21f8a + fc775b4 commit ccf33f9
Show file tree
Hide file tree
Showing 6 changed files with 532 additions and 23 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ in development
--------------


* Service start up code has been updated to log a warning if a non-utf-8 encoding / locale is
detected.

Using non-utf-8 locale while working with unicode data will result in various issues so users
are strongly recommended to ensure encoding for all the StackStorm service is
set to ``utf-8``. (#5182)

Contributed by @Kami.

3.4.0 - March 02, 2021
----------------------

Expand Down
58 changes: 56 additions & 2 deletions st2common/st2common/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,24 @@
'audit'
]

# True if sys.stdout should be patched and re-opened with utf-8 encoding in situations where
# utf-8 encoding is not used (if we don't do that and a logger tries to log a unicode string,
# log format would go in an infinite loop).
# We only expose this variable for testing purposes
PATCH_STDOUT = os.environ.get("ST2_LOG_PATCH_STDOUT", "true").lower() in [
"true",
"1",
"yes",
]

LOG = logging.getLogger(__name__)

# Note: This attribute is used by "find_caller" so it can correctly exclude this file when looking
# for the logger method caller frame.
_srcfile = get_normalized_file_path(__file__)

_original_stdstderr = sys.stderr


def find_caller(stack_info=False, stacklevel=1):
"""
Expand Down Expand Up @@ -197,6 +211,13 @@ def __init__(self, name, level=logging.ERROR):
self._level = level

def write(self, message):
# Work around for infinite loop issue - ensure we don't log unicode data.
# If message contains unicode sequences and process locale is not set to utf-8, it would
# result in infinite lop in _log on formatting the message.
# This is because of the differences between Python 2.7 and Python 3 with log error
# handlers.
message = message.encode("utf-8", "replace").decode("ascii", "ignore")

self._logger._log(self._level, message, None)

def flush(self):
Expand All @@ -222,8 +243,40 @@ def _redirect_stderr():
sys.stderr = LoggingStream('STDERR')


def setup(config_file, redirect_stderr=True, excludes=None, disable_existing_loggers=False,
st2_conf_path=None):
def _patch_stdout():
"""
This function re-opens sys.stdout using utf-8 encoding.
It's to be used in situations where process encoding / locale is not set to utf-8. In such
situations when unicode sequence is logged, it would cause logging formatter to go in an infite
loop on formatting a record.
This function works around that by ensuring sys.stdout is always opened in utf-8 mode.
"""

stdout_encoding = getattr(sys.stdout, "encoding", "none")

if stdout_encoding not in ["utf8", "utf-8"] and PATCH_STDOUT:
LOG.info(
"Patching sys.stdout and re-opening it with utf-8 encoding (originally opened "
"with %s encoding)..." % (stdout_encoding)
)
sys.stdout = open(
sys.stdout.fileno(),
mode="w",
encoding="utf-8",
errors="replace",
buffering=1,
)


def setup(
config_file,
redirect_stderr=True,
excludes=None,
disable_existing_loggers=False,
st2_conf_path=None,
):
"""
Configure logging from file.
Expand All @@ -246,6 +299,7 @@ def setup(config_file, redirect_stderr=True, excludes=None, disable_existing_log
_add_exclusion_filters(handlers=handlers, excludes=excludes)
if redirect_stderr:
_redirect_stderr()
_patch_stdout()
except Exception as exc:
exc_cls = type(exc)
tb_msg = traceback.format_exc()
Expand Down
76 changes: 70 additions & 6 deletions st2common/st2common/service_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import os
import sys
import traceback
import locale
import logging as stdlib_logging

import six
Expand Down Expand Up @@ -62,6 +63,19 @@
'register_service_in_service_registry'
]

# Message which is logged if non utf-8 locale is detected on startup.
NON_UTF8_LOCALE_WARNING_MSG = """
Detected a non utf-8 locale / encoding (fs encoding: %s, default encoding: %s, locale: %s).
Using non utf-8 locale while working with unicode data will result in exceptions and undefined
behavior.
You are strongly encouraged to configure all the StackStorm services to use utf-8 encoding (e.g.
LANG=en_US.UTF-8).
""".strip().replace(
"\n", " "
)

VALID_UTF8_ENCODINGS = ["utf8", "utf-8"]

LOG = logging.getLogger(__name__)


Expand Down Expand Up @@ -99,20 +113,61 @@ def setup(service, config, setup_db=True, register_mq_exchanges=True,
else:
config.parse_args()

version = '%s.%s.%s' % (sys.version_info[0], sys.version_info[1], sys.version_info[2])
LOG.debug('Using Python: %s (%s)' % (version, sys.executable))
version = "%s.%s.%s" % (
sys.version_info[0],
sys.version_info[1],
sys.version_info[2],
)

# We print locale related info to make it easier to troubleshoot issues where locale is not
# set correctly (e.g. using C / ascii, but services are trying to work with unicode data
# would result in things blowing up)

fs_encoding = sys.getfilesystemencoding()
default_encoding = sys.getdefaultencoding()
lang_env = os.environ.get("LANG", "notset")
pythonioencoding_env = os.environ.get("PYTHONIOENCODING", "notset")

try:
language_code, encoding = locale.getdefaultlocale()

if language_code and encoding:
used_locale = ".".join([language_code, encoding])
else:
used_locale = "unable to retrieve locale"
except Exception as e:
language_code, encoding = "unknown", "unknown"
used_locale = "unable to retrieve locale: %s " % (str(e))

LOG.info("Using Python: %s (%s)" % (version, sys.executable))
LOG.info(
"Using fs encoding: %s, default encoding: %s, locale: %s, LANG env variable: %s, "
"PYTHONIOENCODING env variable: %s"
% (fs_encoding, default_encoding, lang_env, used_locale, pythonioencoding_env)
)

config_file_paths = cfg.CONF.config_file
config_file_paths = [os.path.abspath(path) for path in config_file_paths]
LOG.debug('Using config files: %s', ','.join(config_file_paths))
LOG.info("Using config files: %s", ",".join(config_file_paths))

# Setup logging.
logging_config_path = config.get_logging_config_path()
logging_config_path = os.path.abspath(logging_config_path)

LOG.debug('Using logging config: %s', logging_config_path)
LOG.info("Using logging config: %s", logging_config_path)

# Warn on non utf-8 locale which could cause issues when running under Python 3 and working
# with unicode data
if (
fs_encoding.lower() not in VALID_UTF8_ENCODINGS or
encoding.lower() not in VALID_UTF8_ENCODINGS
):
LOG.warning(
NON_UTF8_LOCALE_WARNING_MSG
% (fs_encoding, default_encoding, used_locale.strip())
)

is_debug_enabled = (cfg.CONF.debug or cfg.CONF.system.debug)
is_debug_enabled = cfg.CONF.debug or cfg.CONF.system.debug

try:
logging.setup(logging_config_path, redirect_stderr=cfg.CONF.log.redirect_stderr,
Expand All @@ -137,7 +192,16 @@ def setup(service, config, setup_db=True, register_mq_exchanges=True,
ignore_audit_log_messages = (handler.level >= stdlib_logging.INFO and
handler.level < stdlib_logging.AUDIT)
if not is_debug_enabled and ignore_audit_log_messages:
LOG.debug('Excluding log messages with level "AUDIT" for handler "%s"' % (handler))
try:
handler_repr = str(handler)
except TypeError:
# In case handler doesn't have name assigned, repr would throw
handler_repr = "unknown"

LOG.debug(
'Excluding log messages with level "AUDIT" for handler "%s"'
% (handler_repr)
)
handler.addFilter(LogLevelFilter(log_levels=exclude_log_levels))

if not is_debug_enabled:
Expand Down
86 changes: 86 additions & 0 deletions st2common/tests/integration/log_unicode_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
# Copyright 2020 The StackStorm Authors.
# Copyright 2019 Extreme Networks, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This file is used to test edge case with logging unicode data.
"""

from __future__ import absolute_import

import os
import sys

from oslo_config import cfg

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

ST2ACTIONS_PATH = os.path.join(BASE_DIR, "../../../st2actions")
ST2COMMON_PATH = os.path.join(BASE_DIR, "../../")
ST2TESTS_PATH = os.path.join(BASE_DIR, "../../../st2tests")

# Ensure st2actions is in PYTHONPATH.
# This is needed since this script is spawned from inside integration tests
sys.path.append(ST2ACTIONS_PATH)
sys.path.append(ST2COMMON_PATH)
sys.path.append(ST2TESTS_PATH)

from st2actions.notifier import config
from st2common import log as logging
from st2common.service_setup import setup as common_setup

FIXTURES_DIR = os.path.join(ST2TESTS_PATH, "st2tests/fixtures")
ST2_CONFIG_DEBUG_LL_PATH = os.path.join(
FIXTURES_DIR, "conf/st2.tests.api.debug_log_level.conf"
)

LOG = logging.getLogger(__name__)


def main():
cfg.CONF.set_override("debug", True)
common_setup(
service="test",
config=config,
setup_db=False,
run_migrations=False,
register_runners=False,
register_internal_trigger_types=False,
register_mq_exchanges=False,
register_signal_handlers=False,
service_registry=False,
config_args=["--config-file", ST2_CONFIG_DEBUG_LL_PATH],
)

LOG.info("Test info message 1")
LOG.debug("Test debug message 1")

# 1. Actual unicode sequence
LOG.info("Test info message with unicode 1 - 好好好")
LOG.debug("Test debug message with unicode 1 - 好好好")

# 2. Ascii escape sequence
LOG.info(
"Test info message with unicode 1 - " +
"好好好".encode("ascii", "backslashreplace").decode("ascii", "backslashreplace")
)
LOG.debug(
"Test debug message with unicode 1 - " +
"好好好".encode("ascii", "backslashreplace").decode("ascii", "backslashreplace")
)


if __name__ == "__main__":
main()
Loading

0 comments on commit ccf33f9

Please sign in to comment.