Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rule graph nondeterminism due to undetected ambiguity #7379

Merged
merged 6 commits into from
Mar 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/rust/engine/src/externs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

use std::ffi::OsStr;
use std::ffi::OsString;
use std::fmt;
use std::mem;
use std::os::raw;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
Expand Down Expand Up @@ -486,6 +487,17 @@ pub struct Get {
pub subject: Key,
}

impl fmt::Display for Get {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
write!(
f,
"Get({}, {})",
type_to_str(self.product),
key_to_str(&self.subject)
)
}
}

pub enum GeneratorResponse {
Break(Value),
Get(Get),
Expand Down
104 changes: 54 additions & 50 deletions src/rust/engine/src/rule_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ pub enum Entry {
Singleton(Key, TypeId),
}

impl Entry {
fn params(&self) -> Vec<TypeId> {
match self {
&Entry::WithDeps(ref e) => e.params().iter().cloned().collect(),
&Entry::Param(ref type_id) => vec![*type_id],
&Entry::Singleton { .. } => vec![],
}
}
}

#[derive(Eq, Hash, PartialEq, Clone, Debug)]
pub struct RootEntry {
params: ParamTypes,
Expand Down Expand Up @@ -384,16 +394,16 @@ impl<'t> GraphMaker<'t> {
let dependency_keys = entry.dependency_keys();

for select_key in dependency_keys {
let (params, product) = match &select_key {
&SelectKey::JustSelect(ref s) => (entry.params().clone(), s.product),
let (params, product, provided_param) = match &select_key {
&SelectKey::JustSelect(ref s) => (entry.params().clone(), s.product, None),
&SelectKey::JustGet(ref g) => {
// Unlike Selects, Gets introduce new parameter values into a subgraph.
let get_params = {
let mut p = entry.params().clone();
p.insert(g.subject);
p
};
(get_params, g.product)
(get_params, g.product, Some(g.subject))
}
};

Expand All @@ -415,6 +425,15 @@ impl<'t> GraphMaker<'t> {
fulfillable_candidates.push(
simplified_entries
.into_iter()
.filter(|e| {
// Only entries that actually consume a provided (Get) parameter are eligible
// for consideration.
if let Some(pp) = provided_param {
e.params().contains(&pp)
} else {
true
}
})
.map(Entry::WithDeps)
.collect::<Vec<_>>(),
);
Expand Down Expand Up @@ -484,9 +503,6 @@ impl<'t> GraphMaker<'t> {
}

// No dependencies were completely unfulfillable (although some may have been cyclic).
//
// If this is an Aggregration, flatten the candidates by duplicating the SelectKey to treat
// each concrete rule as a group of candidates. Otherwise, flatten each group of candidates.
let flattened_fulfillable_candidates_by_key = fulfillable_candidates_by_key
.into_iter()
.map(|(k, candidate_group)| (k, Itertools::flatten(candidate_group.into_iter()).collect()))
Expand Down Expand Up @@ -558,8 +574,17 @@ impl<'t> GraphMaker<'t> {
let params_powerset: Vec<Vec<TypeId>> = {
let mut all_used_params = BTreeSet::new();
for (key, inputs) in deps {
let provided_param = match &key {
&SelectKey::JustSelect(_) => None,
&SelectKey::JustGet(ref g) => Some(&g.subject),
};
for input in inputs {
all_used_params.extend(Self::used_params(key, input));
all_used_params.extend(
input
.params()
.into_iter()
.filter(|p| Some(p) != provided_param),
);
}
}
// Compute the powerset ordered by ascending set size.
Expand Down Expand Up @@ -620,12 +645,17 @@ impl<'t> GraphMaker<'t> {
) -> Result<Option<Vec<(&'a SelectKey, &'a Entry)>>, Diagnostic> {
let mut combination = Vec::new();
for (key, input_entries) in deps {
let provided_param = match &key {
&SelectKey::JustSelect(_) => None,
&SelectKey::JustGet(ref g) => Some(&g.subject),
};
let satisfiable_entries = input_entries
.iter()
.filter(|input_entry| {
Self::used_params(key, input_entry)
input_entry
.params()
.iter()
.all(|p| available_params.contains(p))
.all(|p| available_params.contains(p) || Some(p) == provided_param)
})
.collect::<Vec<_>>();

Expand Down Expand Up @@ -668,51 +698,25 @@ impl<'t> GraphMaker<'t> {
}) {
vec![*param]
} else {
// Group by the simplified version of each rule: if exactly one, we're finished. We prefer
// the non-ambiguous rule with the smallest set of Params, as that minimizes Node identities
// in the graph and biases toward receiving values from dependencies (which do not affect our
// identity) rather than dependents.
let mut rules_by_kind: HashMap<EntryWithDeps, (usize, &Entry)> = HashMap::new();
for satisfiable_entry in &satisfiable_entries {
// We prefer the non-ambiguous rule with the smallest set of Params, as that minimizes Node
// identities in the graph and biases toward receiving values from dependencies (which do not
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/identifies/selections?

Copy link
Member Author

@stuhood stuhood Mar 14, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is "identities", which is accurate here... we refer to the Params that a @rule consumes as part of its "identity", because the same @rule can be used with multiple sets of Params, which might each (as seen in this ticket!) affect their behaviour.

// affect our identity) rather than dependents.
let mut minimum_param_set_size = ::std::usize::MAX;
let mut rules = Vec::new();
for satisfiable_entry in satisfiable_entries {
if let &Entry::WithDeps(ref wd) = satisfiable_entry {
rules_by_kind
.entry(wd.simplified(BTreeSet::new()))
.and_modify(|e| {
if e.0 > wd.params().len() {
*e = (wd.params().len(), satisfiable_entry);
}
})
.or_insert((wd.params().len(), satisfiable_entry));
let param_set_size = wd.params().len();
if param_set_size < minimum_param_set_size {
rules.clear();
rules.push(satisfiable_entry);
minimum_param_set_size = param_set_size;
} else if param_set_size == minimum_param_set_size {
rules.push(satisfiable_entry);
}
}
}

rules_by_kind
.into_iter()
.map(|(_, (_, rule))| rule)
.collect::<Vec<_>>()
}
}

///
/// Computes the parameters used by the given SelectKey and Entry.
///
fn used_params(key: &SelectKey, entry: &Entry) -> Vec<TypeId> {
// `Get`s introduce new Params to a subgraph, so using a Param provided by a Get does not
// count toward an Entry's used params.
let provided_param = match &key {
&SelectKey::JustSelect(_) => None,
&SelectKey::JustGet(ref g) => Some(&g.subject),
};

match &entry {
&Entry::WithDeps(ref e) => e
.params()
.iter()
.filter(|&type_id| Some(type_id) != provided_param)
.cloned()
.collect(),
&Entry::Param(ref type_id) if Some(type_id) != provided_param => vec![*type_id],
&Entry::Singleton { .. } | &Entry::Param(_) => vec![],
rules
}
}

Expand Down
5 changes: 4 additions & 1 deletion testprojects/tests/python/pants/dummies/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ python_tests(

python_library(
name = 'example_lib',
sources = 'example_source.py',
sources = [
'example_source.py',
'__init__.py',
],
)

python_tests(
Expand Down
Empty file.
5 changes: 3 additions & 2 deletions tests/python/pants_test/engine/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,10 @@ python_tests(
sources=['test_rules.py'],
coverage=['pants.engine.rules'],
dependencies=[
'3rdparty/python:future',
'3rdparty/python/twitter/commons:twitter.common.collections',
':util',
'3rdparty/python:future',
':scheduler_test_base',
':util',
'src/python/pants/engine:build_files',
'src/python/pants/engine:console',
'src/python/pants/engine:rules',
Expand All @@ -192,6 +192,7 @@ python_tests(
'tests/python/pants_test/engine/examples:parsers',
'tests/python/pants_test/subsystem:subsystem_utils',
'tests/python/pants_test/testutils:py2_compat',
'tests/python/pants_test:test_base',
]
)

Expand Down
52 changes: 45 additions & 7 deletions tests/python/pants_test/engine/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_creation_fails_with_bad_declaration_type(self):
RuleIndex.create([A()])


class RulesetValidatorTest(unittest.TestCase):
class RuleGraphTest(unittest.TestCase):
def test_ruleset_with_missing_product_type(self):
@rule(A, [Select(B)])
def a_from_b_noop(b):
Expand Down Expand Up @@ -240,12 +240,6 @@ def a_from_c(c):
""").strip(),
str(cm.exception))

assert_equal_with_printing = assert_equal_with_printing


class RuleGraphMakerTest(unittest.TestCase):
# TODO HasProducts?

def test_smallest_full_test(self):
@rule(A, [Select(SubA)])
def a_from_suba(suba):
Expand Down Expand Up @@ -385,6 +379,50 @@ def b():
}""").strip(),
subgraph)

def test_potentially_ambiguous_get(self):
# In this case, we validate that a Get is satisfied by a rule that actually consumes its
# parameter, rather than by having the same dependency rule consume a parameter that was
# already in the context.
#
# This accounts for the fact that when someone uses Get (rather than Select), it's because
# they intend for the Get's parameter to be consumed in the subgraph. Anything else would
# be surprising.
@rule(A, [Select(SubA)])
def a(sub_a):
_ = yield Get(B, C()) # noqa: F841

@rule(B, [Select(SubA)])
def b_from_suba(suba):
pass

@rule(SubA, [Select(C)])
def suba_from_c(c):
pass

rules = [
a,
b_from_suba,
suba_from_c,
]

subgraph = self.create_subgraph(A, rules, SubA())
self.assert_equal_with_printing(
dedent("""
digraph {
// root subject types: SubA
// root entries
"Select(A) for SubA" [color=blue]
"Select(A) for SubA" -> {"(A, [Select(SubA)], [Get(B, C)], a()) for SubA"}
// internal entries
"(A, [Select(SubA)], [Get(B, C)], a()) for SubA" -> {"(B, [Select(SubA)], b_from_suba()) for C" "Param(SubA)"}
"(B, [Select(SubA)], b_from_suba()) for C" -> {"(SubA, [Select(C)], suba_from_c()) for C"}
"(B, [Select(SubA)], b_from_suba()) for SubA" -> {"Param(SubA)"}
"(SubA, [Select(C)], suba_from_c()) for C" -> {"Param(C)"}
}
""").strip(),
subgraph,
)

def test_one_level_of_recursion(self):
@rule(A, [Select(B)])
def a_from_b(b):
Expand Down
Loading