Skip to content

Commit

Permalink
Pass values into DedupeRule->sql() rather than hybrid object
Browse files Browse the repository at this point in the history
It doesn't really make sense for this query calculation function to be on a different class
I kinda get what they were thinking but the way our BAOs are half a collection of static
functions but occassionally get serious about being an object but then
like extra properties set without explanation does us no favours. In this
case it would be most sane if all the query calculation was on the same class
(probably a new one but for now just looking at one of the existing ones). To
do that we need to get clearer about the input the sql() function needs - this switches
it most of the way to being static-ready & move-ready by passing in the values as arrays
rather than setting them as properties
  • Loading branch information
eileenmcnaughton committed Feb 27, 2024
1 parent 9f58bc7 commit 39e34b7
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 54 deletions.
94 changes: 44 additions & 50 deletions CRM/Dedupe/BAO/DedupeRule.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,26 @@
*/
class CRM_Dedupe_BAO_DedupeRule extends CRM_Dedupe_DAO_DedupeRule {

/**
* Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
* @var array
*/
public $contactIds = [];

/**
* Params to dedupe against (queries against the whole contact set otherwise)
* @var array
*/
public $params = [];

/**
* Return the SQL query for the given rule - either for finding matching
* pairs of contacts, or for matching against the $params variable (if set).
*
* @param array|null $params
* Params to dedupe against (queries against the whole contact set otherwise)
* @param array $contactIDs
* Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
* @param array $rule
*
* @return string
* SQL query performing the search
* or NULL if params is present and doesn't have and for a field.
*
* @throws \CRM_Core_Exception
*/
public function sql() {
if ($this->params &&
(!array_key_exists($this->rule_table, $this->params) ||
!array_key_exists($this->rule_field, $this->params[$this->rule_table])
public function sql($params, $contactIDs, array $rule): ?string {
if ($params &&
(!array_key_exists($rule['rule_table'], $params) ||
!array_key_exists($rule['rule_field'], $params[$rule['rule_table']])
)
) {
// if params is present and doesn't have an entry for a field, don't construct the clause.
Expand All @@ -58,29 +52,29 @@ public function sql() {
// $using are arrays of required field matchings (for substring and
// full matches, respectively)
$where = [];
$on = ["SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR(t2.{$this->rule_field}, 1, {$this->rule_length})"];
$on = ["SUBSTR(t1.{$rule['rule_field']}, 1, {$rule['rule_length']}) = SUBSTR(t2.{$rule['rule_field']}, 1, {$rule['rule_length']})"];

$innerJoinClauses = [
"t1.{$this->rule_field} IS NOT NULL",
"t2.{$this->rule_field} IS NOT NULL",
"t1.{$this->rule_field} = t2.{$this->rule_field}",
"t1.{$rule['rule_field']} IS NOT NULL",
"t2.{$rule['rule_field']} IS NOT NULL",
"t1.{$rule['rule_field']} = t2.{$rule['rule_field']}",
];

if (in_array($this->getFieldType($this->rule_field), CRM_Utils_Type::getTextTypes(), TRUE)) {
$innerJoinClauses[] = "t1.{$this->rule_field} <> ''";
$innerJoinClauses[] = "t2.{$this->rule_field} <> ''";
if (in_array($this->getFieldType($rule['rule_field']), CRM_Utils_Type::getTextTypes(), TRUE)) {
$innerJoinClauses[] = "t1.{$rule['rule_field']} <> ''";
$innerJoinClauses[] = "t2.{$rule['rule_field']} <> ''";
}

$cidRefs = CRM_Core_DAO::getReferencesToContactTable();
$eidRefs = CRM_Core_DAO::getDynamicReferencesToTable('civicrm_contact');

switch ($this->rule_table) {
switch ($rule['rule_table']) {
case 'civicrm_contact':
$id = 'id';
//we should restrict by contact type in the first step
$sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$this->dedupe_rule_group_id};";
$sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$rule['dedupe_rule_group_id']};";
$ct = CRM_Core_DAO::singleValueQuery($sql);
if ($this->params) {
if ($params) {
$where[] = "t1.contact_type = '{$ct}'";
}
else {
Expand All @@ -90,70 +84,70 @@ public function sql() {
break;

default:
if (array_key_exists($this->rule_table, $eidRefs)) {
$id = $eidRefs[$this->rule_table][0];
$entity_table = $eidRefs[$this->rule_table][1];
if ($this->params) {
if (array_key_exists($rule['rule_table'], $eidRefs)) {
$id = $eidRefs[$rule['rule_table']][0];
$entity_table = $eidRefs[$rule['rule_table']][1];
if ($params) {
$where[] = "t1.$entity_table = 'civicrm_contact'";
}
else {
$where[] = "t1.$entity_table = 'civicrm_contact'";
$where[] = "t2.$entity_table = 'civicrm_contact'";
}
}
elseif (array_key_exists($this->rule_table, $cidRefs)) {
$id = $cidRefs[$this->rule_table][0];
elseif (array_key_exists($rule['rule_table'], $cidRefs)) {
$id = $cidRefs[$rule['rule_table']][0];
}
else {
throw new CRM_Core_Exception("Unsupported rule_table for civicrm_dedupe_rule.id of {$this->id}");
throw new CRM_Core_Exception("Unsupported rule_table for civicrm_dedupe_rule.id of {$rule['id']}");
}
break;
}

// build SELECT based on the field names containing contact ids
// if there are params provided, id1 should be 0
if ($this->params) {
$select = "t1.$id id1, {$this->rule_weight} weight";
if ($params) {
$select = "t1.$id id1, {$rule['rule_weight']} weight";
$subSelect = 'id1, weight';
}
else {
$select = "t1.$id id1, t2.$id id2, {$this->rule_weight} weight";
$select = "t1.$id id1, t2.$id id2, {$rule['rule_weight']} weight";
$subSelect = 'id1, id2, weight';
}

// build FROM (and WHERE, if it's a parametrised search)
// based on whether the rule is about substrings or not
if ($this->params) {
$from = "{$this->rule_table} t1";
if ($params) {
$from = "{$rule['rule_table']} t1";
$str = 'NULL';
if (isset($this->params[$this->rule_table][$this->rule_field])) {
$str = trim(CRM_Utils_Type::escape($this->params[$this->rule_table][$this->rule_field], 'String'));
if (isset($params[$rule['rule_table']][$rule['rule_field']])) {
$str = trim(CRM_Utils_Type::escape($params[$rule['rule_table']][$rule['rule_field']], 'String'));
}
if ($this->rule_length) {
$where[] = "SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR('$str', 1, {$this->rule_length})";
$where[] = "t1.{$this->rule_field} IS NOT NULL";
if ($rule['rule_length']) {
$where[] = "SUBSTR(t1.{$rule['rule_field']}, 1, {$rule['rule_length']}) = SUBSTR('$str', 1, {$rule['rule_length']})";
$where[] = "t1.{$rule['rule_field']} IS NOT NULL";
}
else {
$where[] = "t1.{$this->rule_field} = '$str'";
$where[] = "t1.{$rule['rule_field']} = '$str'";
}
}
else {
if ($this->rule_length) {
$from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $on) . ")";
if ($rule['rule_length']) {
$from = "{$rule['rule_table']} t1 JOIN {$rule['rule_table']} t2 ON (" . implode(' AND ', $on) . ")";
}
else {
$from = "{$this->rule_table} t1 INNER JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $innerJoinClauses) . ")";
$from = "{$rule['rule_table']} t1 INNER JOIN {$rule['rule_table']} t2 ON (" . implode(' AND ', $innerJoinClauses) . ")";
}
}

// finish building WHERE, also limit the results if requested
if (!$this->params) {
if (!$params) {
$where[] = "t1.$id < t2.$id";
}
$query = "SELECT $select FROM $from WHERE " . implode(' AND ', $where);
if ($this->contactIds) {
if ($contactIDs) {
$cids = [];
foreach ($this->contactIds as $cid) {
foreach ($contactIDs as $cid) {
$cids[] = CRM_Utils_Type::escape($cid, 'Integer');
}
if (count($cids) == 1) {
Expand Down
12 changes: 8 additions & 4 deletions CRM/Dedupe/BAO/DedupeRuleGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,15 @@ private function tableQuery() {
// tailored to respect the param and contactId options provided.
$queries = [];
while ($bao->fetch()) {
$bao->contactIds = $this->contactIds;
$bao->params = $this->params;

// Skipping empty rules? Empty rules shouldn't exist; why check?
if ($query = $bao->sql()) {
if ($query = $bao->sql($this->params, $this->contactIds, [
'id' => (int) $bao->id,
'rule_table' => $bao->rule_table,
'rule_length' => $bao->rule_length,
'rule_field' => $bao->rule_field,
'rule_weight' => $bao->rule_weight,
'dedupe_rule_group_id' => $bao->dedupe_rule_group_id,
])) {
$queries["{$bao->rule_table}.{$bao->rule_field}.{$bao->rule_weight}"] = $query;
}
}
Expand Down

0 comments on commit 39e34b7

Please sign in to comment.