Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(profiling): default to libdatadog if autoinject/SSI is enabled #8822

Merged
merged 23 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c82c549
Add preliminary support for auto-instrument in profiling
sanchda Apr 1, 2024
1ccdca8
Suppress more imports
sanchda Apr 1, 2024
e56b475
Merge branch 'main' into sanchda/permissive_protobuf
sanchda Apr 1, 2024
d08aa1e
Merge branch 'main' into sanchda/permissive_protobuf
sanchda Apr 1, 2024
b4b20ab
Style
sanchda Apr 2, 2024
5faf29f
Fix typo
sanchda Apr 2, 2024
9cfb4cc
More fixups
sanchda Apr 2, 2024
5eaf6cb
Merge branch 'main' into sanchda/permissive_protobuf
sanchda Apr 2, 2024
31860fc
Fix wrong config
sanchda Apr 2, 2024
38214b8
Merge branch 'sanchda/permissive_protobuf' of github.com:DataDog/dd-t…
sanchda Apr 2, 2024
f671a35
Make types safe again
sanchda Apr 2, 2024
c6b69ad
Fix config whoopsie
sanchda Apr 2, 2024
970e6f9
Fix import
sanchda Apr 2, 2024
2baa9d4
Merge branch 'main' into sanchda/permissive_protobuf
sanchda Apr 2, 2024
3670665
Update releasenotes/notes/profiling-fix-protobuf-injection-d94f7c1526…
sanchda Apr 2, 2024
d2acd08
Update ddtrace/settings/injection.py
sanchda Apr 2, 2024
0ea2159
Merge branch 'main' into sanchda/permissive_protobuf
sanchda Apr 2, 2024
4ef8cbf
Replace DD_INJECTION_ENABLED with a direct requirement on profiling
sanchda Apr 2, 2024
adba36d
Best effort to cleanup protobuf stuff
sanchda Apr 2, 2024
7dc51f6
lint
emmettbutler Apr 2, 2024
69ca350
Merge branch 'main' into sanchda/permissive_protobuf
emmettbutler Apr 2, 2024
dcb8025
Merge branch 'main' into sanchda/permissive_protobuf
emmettbutler Apr 2, 2024
c6425e5
lint
emmettbutler Apr 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions ddtrace/profiling/collector/memalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,6 @@ def _start_service(self):
if _memalloc is None:
raise collector.CollectorUnavailable

if self._export_libdd_enabled and not ddup.is_available:
self._export_libdd_enabled = False

try:
_memalloc.start(self.max_nframe, self._max_events, self.heap_sample_size)
except RuntimeError:
Expand Down
19 changes: 1 addition & 18 deletions ddtrace/profiling/collector/stack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -487,24 +487,7 @@ class StackCollector(collector.PeriodicCollector):

# If libdd is enabled, propagate the configuration
if config.export.libdd_enabled:
if not ddup.is_available:
# We don't report on this, since it's already been reported in profiler.py
set_use_libdd(False)

# If the user had also set stack.v2.enabled, then we need to disable that as well.
if self._stack_collector_v2_enabled:
self._stack_collector_v2_enabled = False
LOG.error("Stack v2 was requested, but the libdd collector could not be enabled. Falling back to the v1 stack sampler.")
else:
set_use_libdd(True)

# If stack v2 is requested, verify it is loaded properly and that libdd has been enabled.
if self._stack_collector_v2_enabled:
if not stack_v2.is_available:
self._stack_collector_v2_enabled = False
LOG.error("Stack v2 was requested, but it could not be enabled. Check debug logs for more information.")
if not use_libdd:
self._stack_collector_v2_enabled = False
set_use_libdd(True)

# If at the end of things, stack v2 is still enabled, then start the native thread running the v2 sampler
if self._stack_collector_v2_enabled:
Expand Down
102 changes: 62 additions & 40 deletions ddtrace/profiling/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,23 @@ class _ProfilerInstance(service.Service):
init=False, factory=lambda: os.environ.get("AWS_LAMBDA_FUNCTION_NAME"), type=Optional[str]
)
_export_libdd_enabled = attr.ib(type=bool, default=config.export.libdd_enabled)
_export_libdd_required = attr.ib(type=bool, default=config.export.libdd_required)

ENDPOINT_TEMPLATE = "https://intake.profile.{}"

def _build_default_exporters(self):
# type: (...) -> List[exporter.Exporter]
_OUTPUT_PPROF = config.output_pprof
if _OUTPUT_PPROF:
# DEV: Import this only if needed to avoid importing protobuf
# unnecessarily
from ddtrace.profiling.exporter import file

return [
file.PprofFileExporter(prefix=_OUTPUT_PPROF),
]
if not self._export_libdd_enabled:
# If libdatadog support is enabled, we can skip this part
_OUTPUT_PPROF = config.output_pprof
if _OUTPUT_PPROF:
# DEV: Import this only if needed to avoid importing protobuf
# unnecessarily
from ddtrace.profiling.exporter import file

return [
file.PprofFileExporter(prefix=_OUTPUT_PPROF),
]

if self.url is not None:
endpoint = self.url
Expand All @@ -168,13 +171,11 @@ def _build_default_exporters(self):
if self._lambda_function_name is not None:
self.tags.update({"functionname": self._lambda_function_name})

# It's possible to fail to load the libdatadog collector, so we check. Any other consumers
# of libdd can do their own check, so we just log.
if self._export_libdd_enabled and not ddup.is_available:
LOG.error("Failed to load the libdd collector, falling back to the legacy collector")
self._export_libdd_enabled = False
elif self._export_libdd_enabled:
LOG.debug("Using the libdd collector")
# Did the user request the libdd collector? Better log it.
if self._export_libdd_enabled:
LOG.debug("The libdd collector is enabled")
if self._export_libdd_required:
LOG.debug("The libdd collector is required")

# Build the list of enabled Profiling features and send along as a tag
configured_features = []
Expand All @@ -194,6 +195,8 @@ def _build_default_exporters(self):
configured_features.append("exp_dd")
else:
configured_features.append("exp_py")
if self._export_libdd_required:
configured_features.append("req_dd")
configured_features.append("CAP" + str(config.capture_pct))
configured_features.append("MAXF" + str(config.max_frames))
self.tags.update({"profiler_config": "_".join(configured_features)})
Expand All @@ -202,34 +205,53 @@ def _build_default_exporters(self):
if self.endpoint_collection_enabled:
endpoint_call_counter_span_processor.enable()

# If libdd is enabled, then
# * If initialization fails, disable the libdd collector and fall back to the legacy exporter
# * If initialization fails and libdd is required, disable everything and return (error)
if self._export_libdd_enabled:
ddup.init(
env=self.env,
service=self.service,
version=self.version,
tags=self.tags, # type: ignore
max_nframes=config.max_frames,
url=endpoint,
)
else:
# DEV: Import this only if needed to avoid importing protobuf
# unnecessarily
from ddtrace.profiling.exporter import http

return [
http.PprofHTTPExporter(
service=self.service,
try:
ddup.init(
env=self.env,
tags=self.tags,
service=self.service,
version=self.version,
api_key=self.api_key,
endpoint=endpoint,
endpoint_path=endpoint_path,
enable_code_provenance=self.enable_code_provenance,
endpoint_call_counter_span_processor=endpoint_call_counter_span_processor,
tags=self.tags, # type: ignore
max_nframes=config.max_frames,
url=endpoint,
)
]
return []
return []
except Exception as e:
LOG.error("Failed to initialize libdd collector (%s), falling back to the legacy collector", e)
self._export_libdd_enabled = False
config.export.libdd_enabled = False

# If we're here and libdd was required, then there's nothing else to do. We don't have a
# collector.
if self._export_libdd_required:
LOG.error("libdd collector is required but could not be initialized. Disabling profiling.")
config.enabled = False
config.export.libdd_required = False
config.lock.enabled = False
config.memory.enabled = False
config.stack.enabled = False
return []

# DEV: Import this only if needed to avoid importing protobuf
# unnecessarily
from ddtrace.profiling.exporter import http

return [
http.PprofHTTPExporter(
service=self.service,
env=self.env,
tags=self.tags,
version=self.version,
api_key=self.api_key,
endpoint=endpoint,
endpoint_path=endpoint_path,
enable_code_provenance=self.enable_code_provenance,
endpoint_call_counter_span_processor=endpoint_call_counter_span_processor,
)
]

def __attrs_post_init__(self):
# type: (...) -> None
Expand Down
2 changes: 0 additions & 2 deletions ddtrace/profiling/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def __attrs_post_init__(self):
def _start_service(self):
# type: (...) -> None
"""Start the scheduler."""
if self._export_libdd_enabled and not ddup.is_available:
self._export_libdd_enabled = False
LOG.debug("Starting scheduler")
super(Scheduler, self)._start_service()
self._last_export = compat.time_ns()
Expand Down
62 changes: 59 additions & 3 deletions ddtrace/settings/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,37 @@ def _derive_default_heap_sample_size(heap_config, default_heap_sample_size=1024
return int(max(math.ceil(total_mem / max_samples), default_heap_sample_size))


def _check_for_ddup_available():
ddup_is_available = False
try:
from ddtrace.internal.datadog.profiling import ddup

ddup_is_available = ddup.is_available
except Exception:
pass # nosec
return ddup_is_available


def _check_for_stack_v2_available():
stack_v2_is_available = False
if not _check_for_ddup_available():
return False

try:
from ddtrace.internal.datadog.profiling import stack_v2

stack_v2_is_available = stack_v2.is_available
except Exception:
pass # nosec
return stack_v2_is_available


# We don't check for the availability of the ddup module when determining whether libdd is _required_,
# since it's up to the application code to determine what happens in that failure case.
def _is_libdd_required(config):
return config.stack.v2.enabled or config._libdd_required


class ProfilingConfig(En):
__prefix__ = "dd.profiling"

Expand Down Expand Up @@ -162,14 +193,16 @@ class Stack(En):
class V2(En):
__item__ = __prefix__ = "v2"

enabled = En.v(
_enabled = En.v(
bool,
"enabled",
default=False,
help_type="Boolean",
help="Whether to enable the v2 stack profiler. Also enables the libdatadog collector.",
)

enabled = En.d(bool, lambda c: _check_for_stack_v2_available() and c._enabled)

class Lock(En):
__item__ = __prefix__ = "lock"

Expand Down Expand Up @@ -223,13 +256,36 @@ class Heap(En):
class Export(En):
__item__ = __prefix__ = "export"

libdd_enabled = En.v(
_libdd_required = En.v(
bool,
"libdd_required",
default=False,
help_type="Boolean",
help="Requires the native exporter to be enabled",
)

libdd_required = En.d(
bool,
_is_libdd_required,
)

_libdd_enabled = En.v(
bool,
"libdd_enabled",
default=False,
help_type="Boolean",
help="Enables collection and export using the experimental exporter",
help="Enables collection and export using a native exporter. Can fallback to the pure-Python exporter.",
)

libdd_enabled = En.d(
bool, lambda c: (_is_libdd_required(c) or c._libdd_enabled) and _check_for_ddup_available()
)

Export.include(Stack, namespace="stack")


config = ProfilingConfig()

if config.export.libdd_required and not config.export.libdd_enabled:
logger.warning("The native exporter is required, but not enabled. Disabling profiling.")
config.enabled = False
15 changes: 15 additions & 0 deletions lib-injection/dl_wheels.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import argparse
import itertools
import os
from pathlib import Path
import shutil
import subprocess
import sys

Expand Down Expand Up @@ -122,3 +124,16 @@
)
# Remove the wheel as it has been unpacked
os.remove(wheel_file)

sitepackages_root = Path(dl_dir) / f"site-packages-ddtrace-py{python_version}-{platform}"
directories_to_remove = [
sitepackages_root / "google" / "protobuf",
sitepackages_root / "google" / "_upb",
]
directories_to_remove.extend(sitepackages_root.glob("protobuf-*")) # dist-info directories

for directory in directories_to_remove:
try:
shutil.rmtree(directory)
except Exception:
pass
3 changes: 3 additions & 0 deletions lib-injection/sitecustomize.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def _inject():
_log("failed to load ddtrace module: %s" % e, level="error")
return
else:
# In injected environments, the profiler needs to know that it is only allowed to use the native exporter
os.environ["DD_PROFILING_EXPORT_LIBDD_REQUIRED"] = "true"

# This import has the same effect as ddtrace-run for the current process (auto-instrument all libraries).
import ddtrace.bootstrap.sitecustomize

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
fixes:
- |
Profiling: This fix resolves an issue where the profiler was forcing protobuf to load in injected environments,
causing crashes in configurations which relied on older protobuf versions. The profiler will now detect
when injection is used and try loading with the native exporter. If that fails, it will self-disable
rather than loading protobuf.
Loading