Skip to content

Commit

Permalink
Set up pants to use the latest version of humbug (pantsbuild#12302)
Browse files Browse the repository at this point in the history
Uses a new token for telemetry.

In the new setup, the journal ID is no longer necessary. In fact, we are using a new journal.

[ci skip-rust]

[ci skip-build-wheels]
  • Loading branch information
benjyw committed Jul 8, 2021
1 parent db0bf4c commit 86837f3
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 18 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/python/constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ chardet==4.0.0
cryptography==3.4.7
fasteners==0.16
freezegun==1.1.0
humbug==0.1.9
humbug==0.2.6
idna==2.10
iniconfig==1.1.1
packaging==20.9
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ freezegun==1.1.0

# Note: we use humbug to report telemetry. When upgrading, ensure the new version maintains the
# anonymity promise we make here: https://www.pantsbuild.org/docs/anonymous-telemetry
humbug==0.1.9
humbug==0.2.6

packaging==20.9
pex==2.1.42
Expand Down
23 changes: 10 additions & 13 deletions src/python/pants/goal/anonymous_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import uuid
from typing import cast

from humbug.consent import HumbugConsent # type: ignore
from humbug.report import Modes, Report, Reporter # type: ignore
from humbug.consent import HumbugConsent
from humbug.report import HumbugReporter, Modes, Report

from pants.engine.internals.scheduler import Workunit
from pants.engine.rules import collect_rules, rule
Expand All @@ -27,8 +27,7 @@
logger = logging.getLogger(__name__)


_bugout_access_token = "3ae76900-9a68-4a87-a127-7c9f179d7272"
_bugout_journal_id = "801e9b3c-6b03-40a7-870f-5b25d326da66"
_bugout_access_token = "974b1acc-e052-4e5f-a45e-bae928e47bb0"
_telemetry_docs_referral = f"See {doc_url('anonymous-telemetry')} for details"


Expand Down Expand Up @@ -113,29 +112,28 @@ def __call__(
)

if self._anonymous_telemetry.enabled:
repo_id = self._anonymous_telemetry.repo_id
if repo_id is None:
unhashed_repo_id = self._anonymous_telemetry.repo_id
if unhashed_repo_id is None:
logger.error(
f'Please set `repo_id = "<uuid>"` in the [anonymous-telemetry] section '
f"of pants.toml, where `<uuid>` is some fixed random identifier, such as "
f"one generated by uuidgen. No telemetry sent for this run. "
f"{_telemetry_docs_referral}."
)
elif self.validate_repo_id(repo_id):
elif self.validate_repo_id(unhashed_repo_id):
# Assemble and send the telemetry.
# Note that this method is called with finished=True only after the
# StreamingWorkunitHandler context ends, i.e., after end_run() has been called,
# so the RunTracker will have had a chance to finalize its state.
telemetry_data = context.run_tracker.get_anonymous_telemetry_data(repo_id)
telemetry_data = context.run_tracker.get_anonymous_telemetry_data(unhashed_repo_id)
# TODO: Add information about any errors that occurred.

reporter = Reporter(
reporter = HumbugReporter(
name="pantsbuild/pants",
# We've already established consent at this point.
consent=HumbugConsent(True),
session_id=telemetry_data.get("run_id", str(uuid.uuid4())),
session_id=str(telemetry_data.get("run_id", uuid.uuid4())),
bugout_token=_bugout_access_token,
bugout_journal_id=_bugout_journal_id,
timeout_seconds=5,
# We don't want to spawn a thread in the engine, and we're
# already running in a background thread in pantsd.
Expand All @@ -145,7 +143,6 @@ def __call__(
# This is copied from humbug code, to ensure that future changes to humbug
# don't add tags that inadvertently violate our anonymity promise.
system_tags = [
"humbug",
"source:{}".format(reporter.name),
"os:{}".format(reporter.system_information.os),
"arch:{}".format(reporter.system_information.machine),
Expand All @@ -161,7 +158,7 @@ def __call__(
system_tags
+ [
f"pants_version:{telemetry_data.get('pants_version')}",
# This is hashed, unlike the contents of the repo_id var.
# This is hashed, unlike the contents of the unhashed_repo_id var.
f"repo:{telemetry_data.get('repo_id', 'UNKNOWN')}",
f"user:{telemetry_data.get('user_id', 'UNKNOWN')}",
f"machine:{telemetry_data.get('machine_id', 'UNKNOWN')}",
Expand Down
6 changes: 3 additions & 3 deletions src/python/pants/goal/run_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def start(self, run_start_time: float, specs: List[str]) -> None:
}
)

def get_anonymous_telemetry_data(self, repo_id: str) -> dict[str, str | list[str]]:
def get_anonymous_telemetry_data(self, unhashed_repo_id: str) -> dict[str, str | list[str]]:
# TODO: Find a way to know from a goal name whether it's a standard or a custom
# goal whose name could, in theory, reveal something proprietary. That's more work than
# we want to do at the moment, so we maintain this manual list for now.
Expand All @@ -131,9 +131,9 @@ def get_anonymous_telemetry_data(self, repo_id: str) -> dict[str, str | list[str
}

def maybe_hash_with_repo_id_prefix(s: str) -> str:
qualified_str = f"{repo_id}.{s}" if s else repo_id
qualified_str = f"{unhashed_repo_id}.{s}" if s else unhashed_repo_id
# If the repo_id is the empty string we return a blank string.
return sha256(qualified_str.encode()).hexdigest() if repo_id else ""
return sha256(qualified_str.encode()).hexdigest() if unhashed_repo_id else ""

return {
"run_id": str(self._run_info.get("id", uuid.uuid4())),
Expand Down

0 comments on commit 86837f3

Please sign in to comment.