diff --git a/CRM/Core/BAO/SchemaHandler.php b/CRM/Core/BAO/SchemaHandler.php index a58dd8c4821d..03f39c2decd6 100644 --- a/CRM/Core/BAO/SchemaHandler.php +++ b/CRM/Core/BAO/SchemaHandler.php @@ -906,6 +906,49 @@ public static function getInUseCollation(): string { return \Civi::$statics[__CLASS__][__FUNCTION__]; } + /** + * Get estimated number of rows in the given tables. + * + * Note that this query is less precise than SELECT(*) - especially on + * larger tables but performs significantly better. + * See https://dba.stackexchange.com/questions/184685/why-is-count-slow-when-explain-knows-the-answer + * + * @param array $tables + * e.g ['civicrm_contact', 'civicrm_activity'] + * + * @return array + * e.g ['civicrm_contact' => 200000, 'civicrm_activity' => 100000] + */ + public static function getRowCountForTables(array $tables): array { + $cachedResults = Civi::$statics[__CLASS__][__FUNCTION__] ?? []; + // Compile list of tables not already cached. + $tablesToCheck = array_keys(array_diff_key(array_flip($tables), $cachedResults)); + $result = CRM_Core_DAO::executeQuery(' + SELECT TABLE_ROWS as row_count, TABLE_NAME as table_name FROM information_schema.TABLES WHERE + TABLE_NAME IN("' . implode('","', $tablesToCheck) . '") + AND TABLE_SCHEMA = DATABASE()' + ); + while ($result->fetch()) { + $cachedResults[$result->table_name] = (int) $result->row_count; + } + Civi::$statics[__CLASS__][__FUNCTION__] = $cachedResults; + return array_intersect_key($cachedResults, array_fill_keys($tables, TRUE)); + } + + /** + * Get estimated number of rows in the given table. + * + * @see self::getRowCountForTables + * + * @param string $tableName + * + * @return int + * The approximate number of rows in the table. This is also 0 if the table does not exist. + */ + public static function getRowCountForTable(string $tableName): int { + return self::getRowCountForTables([$tableName])[$tableName] ?? 0; + } + /** * Does the database support utf8mb4. * diff --git a/CRM/Core/DAO.php b/CRM/Core/DAO.php index eb84c1651ca9..89b57a76c390 100644 --- a/CRM/Core/DAO.php +++ b/CRM/Core/DAO.php @@ -1100,7 +1100,7 @@ public static function checkFieldExists($tableName, $columnName, $i18nRewrite = } /** - * Scans all the tables using a slow query and table name. + * Gets the names of all the tables in the schema. * * @return array */ diff --git a/CRM/Dedupe/BAO/DedupeRuleGroup.php b/CRM/Dedupe/BAO/DedupeRuleGroup.php index 5e6f1d80b7ff..dc25dc02b4ba 100644 --- a/CRM/Dedupe/BAO/DedupeRuleGroup.php +++ b/CRM/Dedupe/BAO/DedupeRuleGroup.php @@ -263,7 +263,7 @@ public function fillTable() { $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)"; $dao = CRM_Core_DAO::executeQuery($query); - // FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert. + // FIXME: we need to be more accurate with affected rows, especially for insert vs duplicate insert. // And that will help optimize further. $affectedRows = $dao->affectedRows(); @@ -337,28 +337,31 @@ public static function isQuerySetInclusive($tableQueries, $threshold, $exclWeigh } /** - * sort queries by number of records for the table associated with them. - * @param $tableQueries + * Sort queries by number of records for the table associated with them. + * + * @param array $tableQueries */ - public static function orderByTableCount(&$tableQueries) { - static $tableCount = []; - - $tempArray = []; - foreach ($tableQueries as $key => $query) { - $table = explode(".", $key); - $table = $table[0]; - if (!array_key_exists($table, $tableCount)) { - $query = "SELECT COUNT(*) FROM {$table}"; - $tableCount[$table] = CRM_Core_DAO::singleValueQuery($query); - } - $tempArray[$key] = $tableCount[$table]; - } + public static function orderByTableCount(array &$tableQueries): void { + uksort($tableQueries, 'self::isTableBigger'); + } - asort($tempArray); - foreach ($tempArray as $key => $count) { - $tempArray[$key] = $tableQueries[$key]; + /** + * Is the table extracted from the first string larger than the second string. + * + * @param string $a + * e.g civicrm_contact.first_name + * @param string $b + * e.g civicrm_address.street_address + * + * @return int + */ + private static function isTableBigger(string $a, string $b): int { + $tableA = explode('.', $a)[0]; + $tableB = explode('.', $b)[0]; + if ($tableA === $tableB) { + return 0; } - $tableQueries = $tempArray; + return CRM_Core_BAO_SchemaHandler::getRowCountForTable($tableA) <=> CRM_Core_BAO_SchemaHandler::getRowCountForTable($tableB); } /** diff --git a/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php b/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php index ed28228217aa..7970bb786134 100644 --- a/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php +++ b/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php @@ -35,7 +35,7 @@ public function tearDown(): void { * We want to be sure it creates an index and exits gracefully if the index * already exists. */ - public function testCreateIndex() { + public function testCreateIndex(): void { $tables = ['civicrm_uf_join' => ['weight']]; CRM_Core_BAO_SchemaHandler::createIndexes($tables); CRM_Core_BAO_SchemaHandler::createIndexes($tables); @@ -54,9 +54,9 @@ public function testCreateIndex() { /** * Test CRM_Core_BAO_SchemaHandler::getIndexes() function */ - public function testGetIndexes() { + public function testGetIndexes(): void { $indexes = CRM_Core_BAO_SchemaHandler::getIndexes(['civicrm_contact']); - $this->assertTrue(array_key_exists('index_contact_type', $indexes['civicrm_contact'])); + $this->assertArrayHasKey('index_contact_type', $indexes['civicrm_contact']); } /** @@ -94,28 +94,28 @@ public function testCombinedIndex() { /** * Test the drop index if exists function for a non-existent index. */ - public function testCheckIndexNotExists() { + public function testCheckIndexNotExists(): void { $this->assertFalse(CRM_Core_BAO_SchemaHandler::checkIfIndexExists('civicrm_contact', 'magic_button')); } /** * Test the drop index if exists function for a non-existent index. */ - public function testCheckIndexExists() { + public function testCheckIndexExists(): void { $this->assertTrue(CRM_Core_BAO_SchemaHandler::checkIfIndexExists('civicrm_contact', 'index_hash')); } /** * Test the drop index if exists function for a non-existent index. */ - public function testDropIndexNoneExists() { + public function testDropIndexNoneExists(): void { CRM_Core_BAO_SchemaHandler::dropIndexIfExists('civicrm_contact', 'magic_button'); } /** * Test the drop index if exists function. */ - public function testDropIndexExists() { + public function testDropIndexExists(): void { CRM_Core_BAO_SchemaHandler::dropIndexIfExists('civicrm_contact', 'index_hash'); $this->assertFalse(CRM_Core_BAO_SchemaHandler::checkIfIndexExists('civicrm_contact', 'index_hash')); @@ -134,6 +134,18 @@ public function columnTests() { return $columns; } + /** + * Test the drop index if exists function for a non-existent index. + */ + public function testGetRowCountForTable(): void { + $this->assertEquals([ + 'civicrm_worldregion' => 6, + 'civicrm_acl' => 0, + 'civicrm_domain' => 2, + ], CRM_Core_BAO_SchemaHandler::getRowCountForTables(['civicrm_domain', 'civicrm_acl', 'random_name', 'civicrm_worldregion'])); + $this->assertEquals(2, CRM_Core_BAO_SchemaHandler::getRowCountForTable('civicrm_domain')); + } + /** * @param $tableName * @param $columnName diff --git a/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php b/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php index c61a2e763f0d..668a992bb6a9 100644 --- a/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php +++ b/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php @@ -42,7 +42,7 @@ public function tearDown(): void { /** * Test the unsupervised dedupe rule against a group. * - * @throws \Exception + * @throws \CRM_Core_Exception */ public function testUnsupervisedDupes(): void { // make dupe checks based on following contact sets: