From 536660990a30485ae64de7581b136f9ca528d5f0 Mon Sep 17 00:00:00 2001 From: eileen Date: Wed, 16 Jan 2019 13:50:14 +1300 Subject: [PATCH 1/2] Fix bug in Annual summary query (on contact dashboard) whereby a left join gives the wrong results As demonstrated in the test the left join does not do any filtering but it DOES join in as many line items as match the permitted types - resulting in too many rows & the sum calculated on them being incorrect. As this doesn't work it should be removed - I would aniticpate that sites with the financialreportacls extensions would get this adequately added through CRM_Financial_BAO_FinancialType::addACLClausesToWhereClauses but regardless this should go as it causes a bug without achieving it's intended result and adding a test prevents a later fix from re-breaking --- CRM/Contribute/BAO/Contribution.php | 10 +-- .../CRM/Contribute/BAO/ContributionTest.php | 18 ++++++ .../CRMTraits/Financial/PriceSetTrait.php | 61 +++++++++++++++++++ 3 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 tests/phpunit/CRMTraits/Financial/PriceSetTrait.php diff --git a/CRM/Contribute/BAO/Contribution.php b/CRM/Contribute/BAO/Contribution.php index 21d40d568a5a..7cf97b715cfd 100644 --- a/CRM/Contribute/BAO/Contribution.php +++ b/CRM/Contribute/BAO/Contribution.php @@ -5581,14 +5581,7 @@ public static function getAnnualQuery($contactIDs) { } $startDate = "$year$monthDay"; $endDate = "$nextYear$monthDay"; - $financialTypes = []; - CRM_Financial_BAO_FinancialType::getAvailableFinancialTypes($financialTypes); - // this is a clumsy way of saying never return anything - // @todo improve! - $liWhere = " AND i.financial_type_id IN (0)"; - if (!empty($financialTypes)) { - $liWhere = " AND i.financial_type_id NOT IN (" . implode(',', array_keys($financialTypes)) . ")"; - } + $whereClauses = [ 'contact_id' => 'IN (' . $contactIDs . ')', 'contribution_status_id' => '= ' . (int) CRM_Core_PseudoConstant::getKey('CRM_Contribute_BAO_Contribution', 'contribution_status_id', 'Completed'), @@ -5609,7 +5602,6 @@ public static function getAnnualQuery($contactIDs) { AVG(total_amount) as average, currency FROM civicrm_contribution b - LEFT JOIN civicrm_line_item i ON i.contribution_id = b.id AND i.entity_table = 'civicrm_contribution' $liWhere WHERE " . $whereClauseString . " GROUP BY currency "; diff --git a/tests/phpunit/CRM/Contribute/BAO/ContributionTest.php b/tests/phpunit/CRM/Contribute/BAO/ContributionTest.php index 21ac309634ae..16a9d0eb4781 100644 --- a/tests/phpunit/CRM/Contribute/BAO/ContributionTest.php +++ b/tests/phpunit/CRM/Contribute/BAO/ContributionTest.php @@ -32,6 +32,7 @@ class CRM_Contribute_BAO_ContributionTest extends CiviUnitTestCase { use CRMTraits_Financial_FinancialACLTrait; + use CRMTraits_Financial_PriceSetTrait; /** * Clean up after tests. @@ -324,6 +325,23 @@ public function testAnnualQueryWithFinancialACLsEnabled() { $this->disableFinancialACLs(); } + /** + * Test the annual query returns a correct result when multiple line items are present. + */ + public function testAnnualWithMultipleLineItems() { + $contactID = $this->createLoggedInUserWithFinancialACL(); + $this->createContributionWithTwoLineItemsAgainstPriceSet([ + 'contact_id' => $contactID] + ); + $this->enableFinancialACLs(); + $sql = CRM_Contribute_BAO_Contribution::getAnnualQuery([$contactID]); + $result = CRM_Core_DAO::executeQuery($sql); + $result->fetch(); + $this->assertEquals(300, $result->amount); + $this->assertEquals(1, $result->count); + $this->disableFinancialACLs(); + } + /** * Test that financial type data is not added to the annual query if acls not enabled. */ diff --git a/tests/phpunit/CRMTraits/Financial/PriceSetTrait.php b/tests/phpunit/CRMTraits/Financial/PriceSetTrait.php new file mode 100644 index 000000000000..48d102066960 --- /dev/null +++ b/tests/phpunit/CRMTraits/Financial/PriceSetTrait.php @@ -0,0 +1,61 @@ + 300, 'financial_type_id' => 'Donation'], $params); + $priceFields = $this->createPriceSet('contribution'); + foreach ($priceFields['values'] as $key => $priceField) { + $params['line_items'][]['line_item'][$key] = [ + 'price_field_id' => $priceField['price_field_id'], + 'price_field_value_id' => $priceField['id'], + 'label' => $priceField['label'], + 'field_title' => $priceField['label'], + 'qty' => 1, + 'unit_price' => $priceField['amount'], + 'line_total' => $priceField['amount'], + 'financial_type_id' => $priceField['financial_type_id'], + 'entity_table' => 'civicrm_contribution', + ]; + } + $this->callAPISuccess('order', 'create', $params); + } + +} From 0c54553ffe5fc5b4776a5292987055d46430f7a5 Mon Sep 17 00:00:00 2001 From: eileen Date: Wed, 30 Jan 2019 12:14:22 +1300 Subject: [PATCH 2/2] Alter the query used to render the yearly totals on the contact contribution tab to make a better index choice. This PR restructures the annual query to use HAVING clause for contribution_status_id in order to avoid the (edge case) situation where it uses a combined index resulting in a slow query. When a contribution is entered the screen loaded on-close is the contribution tab so when this is slow the data entry process is much slowed. Mmsql index merge optimisation is a default enabled setting which can choose to use 'merged indexes' rather than just one. In this query the best index is contact_id because even when a contact has a relatively large number of contributions they are still trivial compared to the overall DB. By contrast contribution_status_id is not a good choice as it has fairly low variability and when used in combination with contact_id adds very little as trade off for the time it takes to construct a merge index What we are seeing is that the index choice on this query is index_contribution_status,received_date,FK_civicrm_contribution_contact_id,contact_id_contribution_status | FK_civicrm_contribution_contact_id,index_contribution_status which results in a query that takes around 6 seconds on a contact with 18k contributions in a DB of around 30 million contributions. If we restructure the query so that it looks like ``` SELECT COUNT(*) as count, SUM(total_amount) as amount, AVG(total_amount) as average, currency FROM civicrm_contribution b WHERE b.contact_id IN (72) AND b.receive_date >= 20180701 AND b.receive_date < 20190701 GROUP BY currency, contribution_status_id HAVING contribution_status_id = 1; ``` the selected index is FK_civicrm_contribution_contact_id and the query takes .05 seconds. This restructure achieves much the same as using an index hint but as there is some index naming inconsistency 'in the wild' changing the query structure like this makes more sense. Note that I ALSO tested this 1) on the same database on a contact with a negligible number of contributions 2) on a much smaller database (~400k contributions) on a contact with 122 contributions. In both cases it made no difference to which key was chosen as the merge index appears to only be chosen by mysql when more than a certain number of on contributions adhere to a single contact. In the latter case the query time for both versions was recorded as 0.00 so I couldn't see any difference. In the former case the query change sped up the query from 0.02 to 0.00 so there is some additional efficiency in the new query beyond the index choice Links on index merge optimisation https://www.percona.com/blog/2012/12/14/the-optimization-that-often-isnt-index-merge-intersection/ http://gobitcan.com/blog/2014-12-03-what-to-do-when-mysql-ignores-your-index http://mysqlopt.blogspot.com/2013/05/mysql-slow-query-example-of-index-merge.html https://www.percona.com/blog/2012/12/14/the-optimization-that-often-isnt-index-merge-intersection/ https://stackoverflow.com/questions/16283472/why-is-mysql-showing-index-merge-on-this-query https://mariadb.com/kb/en/library/fair-choice-between-range-and-index_merge-optimizations/ https://dba.stackexchange.com/questions/178286/mysql-on-rds-avoiding-index-merge https://mariadb.com/kb/en/library/index_merge-sort_intersection/ https://stackoverflow.com/questions/47943755/mysql-optimizing-subquery-with-index-merge https://dev.mysql.com/doc/refman/8.0/en/index-merge-optimization.html --- CRM/Contribute/BAO/Contribution.php | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CRM/Contribute/BAO/Contribution.php b/CRM/Contribute/BAO/Contribution.php index 7cf97b715cfd..4c4bd27b44d7 100644 --- a/CRM/Contribute/BAO/Contribution.php +++ b/CRM/Contribute/BAO/Contribution.php @@ -5584,10 +5584,10 @@ public static function getAnnualQuery($contactIDs) { $whereClauses = [ 'contact_id' => 'IN (' . $contactIDs . ')', - 'contribution_status_id' => '= ' . (int) CRM_Core_PseudoConstant::getKey('CRM_Contribute_BAO_Contribution', 'contribution_status_id', 'Completed'), 'is_test' => ' = 0', 'receive_date' => ['>=' . $startDate, '< ' . $endDate], ]; + $havingClause = 'contribution_status_id = ' . (int) CRM_Core_PseudoConstant::getKey('CRM_Contribute_BAO_Contribution', 'contribution_status_id', 'Completed'); CRM_Financial_BAO_FinancialType::addACLClausesToWhereClauses($whereClauses); $clauses = []; @@ -5596,6 +5596,8 @@ public static function getAnnualQuery($contactIDs) { } $whereClauseString = implode(' AND ', $clauses); + // See https://github.com/civicrm/civicrm-core/pull/13512 for discussion of how + // this group by + having on contribution_status_id improves performance $query = " SELECT COUNT(*) as count, SUM(total_amount) as amount, @@ -5603,7 +5605,8 @@ public static function getAnnualQuery($contactIDs) { currency FROM civicrm_contribution b WHERE " . $whereClauseString . " - GROUP BY currency + GROUP BY currency, contribution_status_id + HAVING $havingClause "; return $query; }