Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix UnicodeDecodeError when postgres is not configured in english #4253

Merged
merged 3 commits into from
Dec 4, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/4253.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix UnicodeDecodeError when postgres is configured to give non-English errors
15 changes: 8 additions & 7 deletions synapse/storage/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from synapse.storage.engines import PostgresEngine
from synapse.util.caches.descriptors import Cache
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.stringutils import exception_to_unicode

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -249,32 +250,32 @@ def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks,
except self.database_engine.module.OperationalError as e:
# This can happen if the database disappears mid
# transaction.
logger.warn(
logger.warning(
"[TXN OPERROR] {%s} %s %d/%d",
name, e, i, N
name, exception_to_unicode(e), i, N
)
if i < N:
i += 1
try:
conn.rollback()
except self.database_engine.module.Error as e1:
logger.warn(
logger.warning(
"[TXN EROLL] {%s} %s",
name, e1,
name, exception_to_unicode(e1),
)
continue
raise
except self.database_engine.module.DatabaseError as e:
if self.database_engine.is_deadlock(e):
logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
if i < N:
i += 1
try:
conn.rollback()
except self.database_engine.module.Error as e1:
logger.warn(
logger.warning(
"[TXN EROLL] {%s} %s",
name, e1,
name, exception_to_unicode(e1),
)
continue
raise
Expand Down
39 changes: 38 additions & 1 deletion synapse/util/stringutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import random
import string

from six import PY3
import six
from six import PY2, PY3
from six.moves import range

_string_with_symbols = (
Expand Down Expand Up @@ -71,3 +72,39 @@ def to_ascii(s):
return s.encode("ascii")
except UnicodeEncodeError:
return s


def exception_to_unicode(e):
"""Helper function to extract the text of an exception as a unicode string

Args:
e (Exception): exception to be stringified

Returns:
unicode
"""
# urgh, this is a mess. The basic problem here is that psycopg2 constructs its
# exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will
# then produce the raw byte sequence. Under Python 2, this will then cause another
# error if it gets mixed with a `unicode` object, as per
# https://github.com/matrix-org/synapse/issues/4252

# First of all, if we're under python3, everything is fine because it will sort this
# nonsense out for us.
if not PY2:
return str(e)

# otherwise let's have a stab at decoding the exception message. We'll circumvent
# Exception.__str__(), which would explode if someone raised Exception(u'non-ascii')
# and instead look at what is in the args member.

if len(e.args) == 0:
return u""
elif len(e.args) > 1:
return six.text_type(repr(e.args))

msg = e.args
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean msg = e.args[0]?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sigh. yes. thank you.

if isinstance(msg, bytes):
return msg.decode('utf-8', errors='replace')
else:
return msg