Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add telemetry to api server #28

Merged
merged 7 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
build:
docker:
# specify the version here
- image: circleci/elixir:1.7
- image: circleci/elixir:1.11

# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
Expand All @@ -22,5 +22,4 @@ jobs:
- run: mix local.rebar --force
- run: mix deps.get
- run: mix compile --warnings-as-errors
- run: mix coveralls.circle
- run: mix inch.report
- run: mix test
70 changes: 70 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,76 @@ priority sampling documentation].

[Datadog priority sampling documentation]: https://docs.datadoghq.com/tracing/getting_further/trace_sampling_and_storage/#priority-sampling-for-distributed-tracing

## Telemetry

This library includes [telemetry] events that can be used to inspect the
performance and operations involved in sending trace data to Datadog. Please
refer to the [telemetry] documentation to see how to attach to these events
using the standard conventions. The following events are currently exposed:

[telemetry]: https://github.com/beam-telemetry/telemetry

### `[:spandex_datadog, :send_trace, :start]`

This event is executed when the `SpandexDatadog.ApiServer.send_trace/2` API
function is called, typically as a result of the `finish_trace/1` function
being called on your `Tracer` implementation module.

**NOTE:** This event is executed in the same process that is calling this API
function, but at this point, the `Spandex.Trace` data has already been passed
into the `send_trace` function, and thus the active trace can no longer be
modified (for example, it is not possible to use this event to add a span to
represent this API itself having been called).

#### Measurements

* `:system_time`: The time (in native units) this event executed.

#### Metadata

* `trace`: The current `Spandex.Trace` that is being sent to the `ApiServer`.

### `[:spandex_datadog, :send_trace, :stop]`

This event is executed when the `SpandexDatadog.ApiServer.send_trace/2` API
function completes normally.

**NOTE:** This event is executed in the same process that is calling this API
function, but at this point, the `Spandex.Trace` data has already been passed
into the `send_trace` function, and thus the active trace can no longer be
modified (for example, it is not possible to use this event to add a span to
represent this API itself having been called).

#### Measurements

* `:duration`: The time (in native units) spent servicing the API call.

#### Metadata

* `trace`: The `Spandex.Trace` that was sent to the `ApiServer`.

### `[:spandex_datadog, :send_trace, :exception]`

This event is executed when the `SpandexDatadog.ApiServer.send_trace/2` API
function ends prematurely due to an error or exit.

**NOTE:** This event is executed in the same process that is calling this API
function, but at this point, the `Spandex.Trace` data has already been passed
into the `send_trace` function, and thus the active trace can no longer be
modified (for example, it is not possible to use this event to add a span to
represent this API itself having been called).

#### Measurements

* `:duration`: The time (in native units) spent servicing the API call.

#### Metadata

* `trace`: The current `Spandex.Trace` that is being sent to the `ApiServer`.
* `:kind`: The kind of exception raised.
* `:error`: Error data associated with the relevant kind of exception.
* `:stacktrace`: The stacktrace associated with the exception.

## API Sender Performance

Originally, the library had an API server and spans were sent via
Expand Down
9 changes: 6 additions & 3 deletions lib/spandex_datadog/api_server.ex
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ defmodule SpandexDatadog.ApiServer do
"""
@spec init(opts :: Keyword.t()) :: {:ok, State.t()}
def init(opts) do
{:ok, agent_pid} = Agent.start_link(fn -> 0 end, name: :spandex_currently_send_count)
{:ok, agent_pid} = Agent.start_link(fn -> 0 end)

state = %State{
asynchronous_send?: true,
Expand All @@ -106,8 +106,11 @@ defmodule SpandexDatadog.ApiServer do
"""
@spec send_trace(Trace.t(), Keyword.t()) :: :ok
def send_trace(%Trace{} = trace, opts \\ []) do
timeout = Keyword.get(opts, :timeout, 30_000)
GenServer.call(__MODULE__, {:send_trace, trace}, timeout)
:telemetry.span([:spandex_datadog, :send_trace], %{trace: trace}, fn ->
timeout = Keyword.get(opts, :timeout, 30_000)
result = GenServer.call(__MODULE__, {:send_trace, trace}, timeout)
{result, %{trace: trace}}
end)
end

@deprecated "Please use send_trace/2 instead"
Expand Down
9 changes: 2 additions & 7 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,7 @@ defmodule SpandexDatadog.MixProject do
deps: deps(),
elixirc_paths: elixirc_paths(Mix.env()),
docs: docs(),
package: package(),
test_coverage: [tool: ExCoveralls],
preferred_cli_env: [
"coveralls.circle": :test,
coveralls: :test
]
package: package()
]
end

Expand Down Expand Up @@ -58,10 +53,10 @@ defmodule SpandexDatadog.MixProject do
defp deps do
[
{:ex_doc, ">= 0.0.0", only: :dev},
{:excoveralls, "~> 0.10", only: :test},
{:git_ops, "~> 2.0", only: [:dev]},
{:inch_ex, "~> 2.0", only: [:dev, :test]},
{:spandex, "~> 3.0"},
{:telemetry, "~> 0.4"},
{:httpoison, "~> 0.13", only: :test},
{:msgpax, "~> 2.2.1"}
]
Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@
"plug_crypto": {:hex, :plug_crypto, "1.1.2", "bdd187572cc26dbd95b87136290425f2b580a116d3fb1f564216918c9730d227", [:mix], [], "hexpm", "6b8b608f895b6ffcfad49c37c7883e8df98ae19c6a28113b02aa1e9c5b22d6b5"},
"spandex": {:hex, :spandex, "3.0.1", "1979e96372cef16dd7a9b079da5b21647d6d7f2d3e6aa544d9ce5638e9350cb5", [:mix], [{:decorator, "~> 1.2", [hex: :decorator, repo: "hexpm", optional: true]}, {:optimal, "~> 0.3.3", [hex: :optimal, repo: "hexpm", optional: false]}, {:plug, ">= 1.0.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "22a5046b67d751cf4bbebf1ee23d977d8e05ad750ea578070ba6e5b293b64ff2"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm", "603561dc0fd62f4f2ea9b890f4e20e1a0d388746d6e20557cafb1b16950de88c"},
"telemetry": {:hex, :telemetry, "0.4.2", "2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm", "2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm", "1d1848c40487cdb0b30e8ed975e34e025860c02e419cb615d255849f3427439d"},
}
76 changes: 74 additions & 2 deletions test/api_server_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule SpandexDatadog.ApiServerTest do
use ExUnit.Case
use ExUnit.Case, async: false

import ExUnit.CaptureLog

Expand All @@ -24,8 +24,21 @@ defmodule SpandexDatadog.ApiServerTest do
end
end

defmodule TestSlowApiServer do
def put(_url, _body, _headers) do
Process.sleep(500)
{:error, :timeout}
end
end

defmodule TelemetryRecorderPDict do
def handle_event(event, measurements, metadata, _cfg) do
Process.put(event, {measurements, metadata})
end
end

setup_all do
{:ok, agent_pid} = Agent.start_link(fn -> 0 end, name: :spandex_currently_send_count)
{:ok, agent_pid} = Agent.start_link(fn -> 0 end)
trace_id = 4_743_028_846_331_200_905

{:ok, span_1} =
Expand Down Expand Up @@ -84,6 +97,65 @@ defmodule SpandexDatadog.ApiServerTest do
}
end

describe "ApiServer.send_trace/2" do
test "executes telemetry on success", %{trace: trace} do
:telemetry.attach_many(
"log-response-handler",
[
[:spandex_datadog, :send_trace, :start],
[:spandex_datadog, :send_trace, :stop],
[:spandex_datadog, :send_trace, :exception]
],
&TelemetryRecorderPDict.handle_event/4,
nil
)

ApiServer.start_link(http: TestOkApiServer)

ApiServer.send_trace(trace)

{start_measurements, start_metadata} = Process.get([:spandex_datadog, :send_trace, :start])
assert start_measurements[:system_time]
assert trace == start_metadata[:trace]

{stop_measurements, stop_metadata} = Process.get([:spandex_datadog, :send_trace, :stop])
assert stop_measurements[:duration]
assert trace == stop_metadata[:trace]

refute Process.get([:spandex_datadog, :send_trace, :exception])
end

test "executes telemetry on exception", %{trace: trace} do
:telemetry.attach_many(
"log-response-handler",
[
[:spandex_datadog, :send_trace, :start],
[:spandex_datadog, :send_trace, :stop],
[:spandex_datadog, :send_trace, :exception]
],
&TelemetryRecorderPDict.handle_event/4,
nil
)

ApiServer.start_link(http: TestSlowApiServer, batch_size: 0, sync_threshold: 0)

catch_exit(ApiServer.send_trace(trace, timeout: 1))

{start_measurements, start_metadata} = Process.get([:spandex_datadog, :send_trace, :start])
assert start_measurements[:system_time]
assert trace == start_metadata[:trace]

refute Process.get([:spandex_datadog, :send_trace, :stop])

{exception_measurements, exception_metadata} = Process.get([:spandex_datadog, :send_trace, :exception])
assert exception_measurements[:duration]
assert trace == start_metadata[:trace]
assert :exit == exception_metadata[:kind]
assert nil == exception_metadata[:error]
assert is_list(exception_metadata[:stacktrace])
end
end

describe "ApiServer.handle_call/3 - :send_trace" do
test "doesn't log anything when verbose?: false", %{trace: trace, state: state, url: url} do
log =
Expand Down