From b143f720f6d61a90c268673db142df948dd81621 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Sep 2021 15:32:03 +0100 Subject: [PATCH 1/4] Use `execute_values` more in PostgreSQL `execute_values` is a faster version of `execute_batch`. --- synapse/storage/database.py | 61 +++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 95d2caff628c..e432e998f597 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -280,18 +280,18 @@ def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None: else: self.executemany(sql, args) - def execute_values(self, sql: str, *args: Any) -> List[Tuple]: + def execute_values(self, sql: str, *args: Any, fetch: bool = True) -> List[Tuple]: """Corresponds to psycopg2.extras.execute_values. Only available when using postgres. - Always sets fetch=True when caling `execute_values`, so will return the - results. + The `fetch` parameter must be set to False if the query does not return + rows (e.g. INSERTs). """ assert isinstance(self.database_engine, PostgresEngine) from psycopg2.extras import execute_values # type: ignore return self._do_execute( - lambda *x: execute_values(self.txn, *x, fetch=True), sql, *args + lambda *x: execute_values(self.txn, *x, fetch=fetch), sql, *args ) def execute(self, sql: str, *args: Any) -> None: @@ -920,13 +920,23 @@ def simple_insert_many_txn( if k != keys[0]: raise RuntimeError("All items must have the same keys") - sql = "INSERT INTO %s (%s) VALUES(%s)" % ( - table, - ", ".join(k for k in keys[0]), - ", ".join("?" for _ in keys[0]), - ) + if isinstance(txn.database_engine, PostgresEngine): + # We use `execute_values` for postgres as it can be a lot faster + # than `execute_batch`, but it's only available on postgres. + sql = "INSERT INTO %s (%s) VALUES ?" % ( + table, + ", ".join(k for k in keys[0]), + ) - txn.execute_batch(sql, vals) + txn.execute_values(sql, vals, fetch=False) + else: + sql = "INSERT INTO %s (%s) VALUES(%s)" % ( + table, + ", ".join(k for k in keys[0]), + ", ".join("?" for _ in keys[0]), + ) + + txn.execute_batch(sql, vals) async def simple_upsert( self, @@ -1281,20 +1291,33 @@ def simple_upsert_many_txn_native_upsert( k + "=EXCLUDED." + k for k in value_names ) - sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s" % ( - table, - ", ".join(k for k in allnames), - ", ".join("?" for _ in allnames), - ", ".join(key_names), - latter, - ) - args = [] for x, y in zip(key_values, value_values): args.append(tuple(x) + tuple(y)) - return txn.execute_batch(sql, args) + if isinstance(txn.database_engine, PostgresEngine): + # We use `execute_values` for postgres as it can be a lot faster + # than `execute_batch`, but it's only available on postgres. + sql = "INSERT INTO %s (%s) VALUES ? ON CONFLICT (%s) DO %s" % ( + table, + ", ".join(k for k in allnames), + ", ".join(key_names), + latter, + ) + + txn.execute_values(sql, args, fetch=False) + + else: + sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s" % ( + table, + ", ".join(k for k in allnames), + ", ".join("?" for _ in allnames), + ", ".join(key_names), + latter, + ) + + return txn.execute_batch(sql, args) @overload async def simple_select_one( From ded73ff165bd1535d6e544bd5d9ff3609b87b150 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Sep 2021 15:35:38 +0100 Subject: [PATCH 2/4] Newsfile --- changelog.d/10754.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10754.misc diff --git a/changelog.d/10754.misc b/changelog.d/10754.misc new file mode 100644 index 000000000000..3b7acff03f4f --- /dev/null +++ b/changelog.d/10754.misc @@ -0,0 +1 @@ +Minor speed ups when joining large rooms over federation. From 1ecc83923e0ce31b012f3c90e687c74780871129 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Sep 2021 16:02:41 +0100 Subject: [PATCH 3/4] Update synapse/storage/database.py Co-authored-by: Patrick Cloke --- synapse/storage/database.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e432e998f597..6dfa9a6898bb 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -921,8 +921,8 @@ def simple_insert_many_txn( raise RuntimeError("All items must have the same keys") if isinstance(txn.database_engine, PostgresEngine): - # We use `execute_values` for postgres as it can be a lot faster - # than `execute_batch`, but it's only available on postgres. + # We use `execute_values` as it can be a lot faster than `execute_batch`, + # but it's only available on postgres. sql = "INSERT INTO %s (%s) VALUES ?" % ( table, ", ".join(k for k in keys[0]), From 0dee6127987e723fb4aad5a0f85e172914f34075 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Sep 2021 16:08:31 +0100 Subject: [PATCH 4/4] Fixup comment --- synapse/storage/database.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 6dfa9a6898bb..0084d9f96ccc 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1297,8 +1297,8 @@ def simple_upsert_many_txn_native_upsert( args.append(tuple(x) + tuple(y)) if isinstance(txn.database_engine, PostgresEngine): - # We use `execute_values` for postgres as it can be a lot faster - # than `execute_batch`, but it's only available on postgres. + # We use `execute_values` as it can be a lot faster than `execute_batch`, + # but it's only available on postgres. sql = "INSERT INTO %s (%s) VALUES ? ON CONFLICT (%s) DO %s" % ( table, ", ".join(k for k in allnames),