Skip to content

Commit

Permalink
Merge pull request #675 from talex5/declutter-trace
Browse files Browse the repository at this point in the history
Improve traces
  • Loading branch information
talex5 authored Jan 27, 2024
2 parents b68d396 + 4a91a67 commit 0776a7d
Show file tree
Hide file tree
Showing 19 changed files with 446 additions and 314 deletions.
69 changes: 15 additions & 54 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,70 +220,30 @@ Calling an operation that performs an effect (such as `yield`) can switch to a d
When OCaml's tracing is turned on, Eio writes events about many actions,
such as creating fibers or resolving promises.

[examples/trace](./examples/trace/) shows how to consume the events manually:
You can use [eio-trace][] to capture a trace and display it in a window.
For example, this is a trace of the counting example above:

<!-- $MDX skip -->
```sh
$ dune exec -- ./examples/trace/main.exe
+tracer: starting
30926487700447:ring 0: create fiber 0
30926487702032:ring 0: running fiber 0
30926487705057:ring 0: create switch 1
30926487707264:ring 0: create fiber 2
30926487707512:ring 0: running fiber 2
30926487720213:ring 0: log "tracer: starting"
+server: starting
+client: connecting socket...
+server: got connection from client
+server: read "Hello" from socket
30926487769298:ring 0: running fiber 0
30926487769877:ring 0: create fiber 3
30926487770083:ring 0: running fiber 3
30926487771198:ring 0: create switch 4
30926487807888:ring 0: create switch 5
30926487808329:ring 0: create fiber 6
30926487808555:ring 0: running fiber 6
30926487812219:ring 0: log "server: starting"
30926487818883:ring 0: running fiber 3
30926487819091:ring 0: create fiber 7
30926487819155:ring 0: running fiber 7
30926487822428:ring 0: log "client: connecting socket..."
30926487901604:ring 0: running fiber 3
30926487904947:ring 0: running fiber 0
30926487907318:ring 0: running fiber 6
30926487917202:ring 0: log "server: got connection from client"
30926487929993:ring 0: running fiber 6
30926487941403:ring 0: running fiber 7
30926487948000:ring 0: running fiber 7
30926487971898:ring 0: resolve 7
30926487974810:ring 0: running fiber 6
30926487975215:ring 0: running fiber 6
30926487977869:ring 0: running fiber 6
30926487984514:ring 0: log "server: read \"Hello\" from socket"
30926487990785:ring 0: resolve 6
30926487991752:ring 0: running fiber 3
30926488022310:ring 0: resolve 3
30926497839725:ring 0: running fiber 2
+tracer: stopping
```
dune build ./examples
eio-trace run -- ./_build/default/examples/both/main.exe
```

<p align='center'>
<img src="./doc/traces/both-posix.svg"/>
</p>

The upper horizontal bar is the initial fiber, and the brackets show `Fiber.both` creating a second fiber.
The green segments show when each fiber is running.
Note that the output from `traceln` appears in the trace as well as on the console.
In the eio-trace window, scrolling with the mouse or touchpad will zoom in or out of the diagram.

There are various third-party tools that can consume this data
There are various third-party tools that can also consume this data
(but may currently require patches to support the new system):

- [Meio][] (Monitoring for Eio) provides an interactive console-based UI for exploring running fibers.
- [Olly][] can save Perfetto traces and report statistics.
- [mirage-trace-viewer][] renders traces visually.

For example, this is how mirage-trace-viewer renders the counting example above:

<p align='center'>
<img src="./doc/trace.svg"/>
</p>

This shows the two counting threads as two horizonal lines.
The white regions indicate when each thread was running.
[examples/trace](./examples/trace/) shows how to consume the events manually.

## Cancellation

Expand Down Expand Up @@ -1893,3 +1853,4 @@ Some background about the effects system can be found in:
[Eio.Process]: https://ocaml-multicore.github.io/eio/eio/Eio/Process/index.html
[Dev meetings]: https://docs.google.com/document/d/1ZBfbjAkvEkv9ldumpZV5VXrEc_HpPeYjHPW_TiwJe4Q
[Olly]: https://github.com/tarides/runtime_events_tools
[eio-trace]: https://github.com/ocaml-multicore/eio-trace
223 changes: 0 additions & 223 deletions doc/trace.svg

This file was deleted.

2 changes: 2 additions & 0 deletions doc/traces/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
all:
eio-trace render -f both-posix.fxt both-posix.svg
Binary file added doc/traces/both-posix.fxt
Binary file not shown.
372 changes: 372 additions & 0 deletions doc/traces/both-posix.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions examples/both/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(executable
(name main)
(libraries eio_main))
7 changes: 7 additions & 0 deletions examples/both/main.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
open Eio.Std

let () =
Eio_main.run @@ fun _env ->
Fiber.both
(fun () -> for x = 1 to 3 do traceln "x = %d" x; Fiber.yield () done)
(fun () -> for y = 1 to 3 do traceln "y = %d" y; Fiber.yield () done)
1 change: 1 addition & 0 deletions lib_eio/core/debug.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ let traceln_mutex = Mutex.create ()

let default_traceln ?__POS__:pos fmt =
let k go =
Trace.with_span "traceln" @@ fun () ->
let b = Buffer.create 512 in
let f = Format.formatter_of_buffer b in
go f;
Expand Down
8 changes: 8 additions & 0 deletions lib_eio/core/single_waiter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ let await t op id =
in
Trace.get id;
x

let await_protect t op id =
let x =
Suspend.enter_unchecked op @@ fun _ctx enqueue ->
t.wake <- (fun x -> t.wake <- ignore; enqueue x)
in
Trace.get id;
x
6 changes: 2 additions & 4 deletions lib_eio/core/switch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ let rec await_idle t =
(* Wait for fibers to finish: *)
while t.fibers > 0 do
Trace.try_get t.cancel.id;
Single_waiter.await t.waiter "Switch.await_idle" t.cancel.id
Single_waiter.await_protect t.waiter "Switch.await_idle" t.cancel.id
done;
(* Call on_release handlers: *)
let queue = Lwt_dllist.create () in
Expand All @@ -102,15 +102,13 @@ let rec await_idle t =
| None -> await_idle t
| Some fn ->
begin
try fn () with
try Cancel.protect fn with
| ex -> fail t ex
end;
release ()
in
release ()

let await_idle t = Cancel.protect (fun _ -> await_idle t)

let maybe_raise_exs t =
match t.exs with
| None -> ()
Expand Down
6 changes: 4 additions & 2 deletions lib_eio/unix/eio_unix.mli
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@ val sleep : float -> unit
without having to plumb {!Eio.Stdenv.mono_clock} through your code.
It can also be used in programs that don't care about tracking determinism. *)

val run_in_systhread : (unit -> 'a) -> 'a
val run_in_systhread : ?label:string -> (unit -> 'a) -> 'a
(** [run_in_systhread fn] runs the function [fn] in a newly created system thread (a {! Thread.t}).
This allows blocking calls to be made non-blocking. *)
This allows blocking calls to be made non-blocking.
@param label The operation name to use in trace output. *)

val pipe : Switch.t -> source_ty r * sink_ty r
(** [pipe sw] returns a connected pair of flows [src] and [sink]. Data written to [sink]
Expand Down
2 changes: 1 addition & 1 deletion lib_eio/unix/net.ml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ let getnameinfo (sockaddr : Eio.Net.Sockaddr.t) =
| `Udp _ -> [Unix.NI_DGRAM]
in
let sockaddr = sockaddr_to_unix sockaddr in
Private.run_in_systhread (fun () ->
Private.run_in_systhread ~label:"getnameinfo" (fun () ->
let Unix.{ni_hostname; ni_service} = Unix.getnameinfo sockaddr options in
(ni_hostname, ni_service))

Expand Down
13 changes: 4 additions & 9 deletions lib_eio/unix/private.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@ let pipe sw = Effect.perform (Pipe sw)
module Rcfd = Rcfd
module Fork_action = Fork_action

let run_in_systhread fn =
let f fiber enqueue =
match Eio.Private.Fiber_context.get_error fiber with
| Some err -> enqueue (Error err)
| None ->
let _t : Thread.t = Thread.create (fun () -> enqueue (try Ok (fn ()) with exn -> Error exn)) () in
()
in
Effect.perform (Eio.Private.Effects.Suspend f)
let run_in_systhread ?(label="systhread") fn =
Eio.Private.Suspend.enter label @@ fun _ctx enqueue ->
let _t : Thread.t = Thread.create (fun () -> enqueue (try Ok (fn ()) with exn -> Error exn)) () in
()
7 changes: 5 additions & 2 deletions lib_eio/unix/rcfd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ let get t =
put t;
None

let close_fd fd =
Eio.Private.Trace.with_span "close" (fun () -> Unix.close fd)

(* Note: we could simplify this a bit by incrementing [t.ops], as [remove] does.
However, that makes dscheck too slow. *)
let close t =
Expand All @@ -105,14 +108,14 @@ let close t =
(* Another caller closed [t] before us. *)
false
| Open fd as prev ->
let next = Closing (fun () -> Unix.close fd) in
let next = Closing (fun () -> close_fd fd) in
if Atomic.compare_and_set t.fd prev next then (
(* We just transitioned from [open] to [closing/users] or [closing/no-users].
We are now the closer. *)
if Atomic.get t.ops = 0 && Atomic.compare_and_set t.fd next fully_closed then (
(* We were in [closing/no-users] and are now in [closing/closed].
We own the FD (and our original callback will never be called). *)
Unix.close fd
close_fd fd
) else (
(* The [next] callback remained installed and there is nothing left for us to do:
- If [t.ops] was non-zero, another thread will eventually return it to zero and call our callback.
Expand Down
8 changes: 4 additions & 4 deletions lib_eio_linux/low_level.ml
Original file line number Diff line number Diff line change
Expand Up @@ -341,11 +341,11 @@ let lseek fd off cmd =

let fsync fd =
(* todo: https://github.com/ocaml-multicore/ocaml-uring/pull/103 *)
Eio_unix.run_in_systhread @@ fun () ->
Eio_unix.run_in_systhread ~label:"fsync" @@ fun () ->
Fd.use_exn "fsync" fd Unix.fsync

let ftruncate fd len =
Eio_unix.run_in_systhread @@ fun () ->
Eio_unix.run_in_systhread ~label:"ftruncate" @@ fun () ->
Fd.use_exn "ftruncate" fd @@ fun fd ->
Unix.LargeFile.ftruncate fd (Optint.Int63.to_int64 len)

Expand Down Expand Up @@ -479,7 +479,7 @@ let read_dir fd =
let files = List.filter (function ".." | "." -> false | _ -> true) files in
read_all (files @ acc) fd
in
Eio_unix.run_in_systhread (fun () -> read_all [] fd)
Eio_unix.run_in_systhread ~label:"read_dir" (fun () -> read_all [] fd)

(* https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml *)
let getaddrinfo ~service node =
Expand All @@ -494,7 +494,7 @@ let getaddrinfo ~service node =
| _ -> None)
| _ -> None
in
Eio_unix.run_in_systhread @@ fun () ->
Eio_unix.run_in_systhread ~label:"getaddrinfo" @@ fun () ->
Unix.getaddrinfo node service []
|> List.filter_map to_eio_sockaddr_t

Expand Down
7 changes: 5 additions & 2 deletions lib_eio_linux/sched.ml
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,14 @@ let enter op fn =
Trace.suspend_fiber op;
Effect.perform (Enter fn)

let submit uring =
Trace.with_span "submit" (fun () -> Uring.submit uring)

let rec enqueue_job t fn =
match fn () with
| Some _ as r -> r
| None ->
if Uring.submit t.uring > 0 then enqueue_job t fn
if submit t.uring > 0 then enqueue_job t fn
else None

(* Cancellations always come from the same domain, so no need to send wake events here. *)
Expand Down Expand Up @@ -219,7 +222,7 @@ let rec schedule ({run_q; sleep_q; mem_q; uring; _} as st) : [`Exit_scheduler] =
Lf_queue.push run_q IO; (* Re-inject IO job in the run queue *)
handle_complete st ~runnable result
| None ->
ignore (Uring.submit uring : int);
ignore (submit uring : int);
let timeout =
match next_due with
| `Wait_until time ->
Expand Down
22 changes: 11 additions & 11 deletions lib_eio_posix/low_level.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ module Trace = Eio.Private.Trace
module Fiber_context = Eio.Private.Fiber_context

(* todo: keeping a pool of workers is probably faster *)
let in_worker_thread = Eio_unix.run_in_systhread
let in_worker_thread label = Eio_unix.run_in_systhread ~label

let await_readable op fd =
Fd.use_exn "await_readable" fd @@ fun fd ->
Expand Down Expand Up @@ -115,11 +115,11 @@ let getrandom { Cstruct.buffer; off; len } =
else
loop (n + eio_getrandom buffer (off + n) (len - n))
in
in_worker_thread @@ fun () ->
in_worker_thread "getrandom" @@ fun () ->
loop 0

let realpath path =
in_worker_thread @@ fun () ->
in_worker_thread "realpath" @@ fun () ->
Unix.realpath path

let read_entries h =
Expand All @@ -132,7 +132,7 @@ let read_entries h =
aux []

let readdir path =
in_worker_thread @@ fun () ->
in_worker_thread "readdir" @@ fun () ->
let h = Unix.opendir path in
match read_entries h with
| r -> Unix.closedir h; r
Expand Down Expand Up @@ -201,29 +201,29 @@ external eio_openat : Unix.file_descr -> string -> Open_flags.t -> int -> Unix.f
let openat ?dirfd ~sw ~mode path flags =
with_dirfd "openat" dirfd @@ fun dirfd ->
Switch.check sw;
in_worker_thread (fun () -> eio_openat dirfd path Open_flags.(flags + cloexec + nonblock) mode)
in_worker_thread "openat" (fun () -> eio_openat dirfd path Open_flags.(flags + cloexec + nonblock) mode)
|> Fd.of_unix ~sw ~blocking:false ~close_unix:true

external eio_mkdirat : Unix.file_descr -> string -> Unix.file_perm -> unit = "caml_eio_posix_mkdirat"

let mkdir ?dirfd ~mode path =
with_dirfd "mkdirat" dirfd @@ fun dirfd ->
in_worker_thread @@ fun () ->
in_worker_thread "mkdir" @@ fun () ->
eio_mkdirat dirfd path mode

external eio_unlinkat : Unix.file_descr -> string -> bool -> unit = "caml_eio_posix_unlinkat"

let unlink ?dirfd ~dir path =
with_dirfd "unlink" dirfd @@ fun dirfd ->
in_worker_thread @@ fun () ->
in_worker_thread "unlink" @@ fun () ->
eio_unlinkat dirfd path dir

external eio_renameat : Unix.file_descr -> string -> Unix.file_descr -> string -> unit = "caml_eio_posix_renameat"

let rename ?old_dir old_path ?new_dir new_path =
with_dirfd "rename-old" old_dir @@ fun old_dir ->
with_dirfd "rename-new" new_dir @@ fun new_dir ->
in_worker_thread @@ fun () ->
in_worker_thread "rename" @@ fun () ->
eio_renameat old_dir old_path new_dir new_path

type stat
Expand All @@ -236,7 +236,7 @@ let fstat ~buf fd =
eio_fstat buf fd

let fstatat ~buf ?dirfd ~follow path =
in_worker_thread @@ fun () ->
in_worker_thread "fstat" @@ fun () ->
let flags = if follow then 0 else Config.at_symlink_nofollow in
with_dirfd "fstatat" dirfd @@ fun dirfd ->
eio_fstatat buf dirfd path flags
Expand Down Expand Up @@ -273,11 +273,11 @@ let lseek fd off cmd =
|> Optint.Int63.of_int64

let fsync fd =
Eio_unix.run_in_systhread @@ fun () ->
Eio_unix.run_in_systhread ~label:"fsync" @@ fun () ->
Fd.use_exn "fsync" fd Unix.fsync

let ftruncate fd len =
Eio_unix.run_in_systhread @@ fun () ->
Eio_unix.run_in_systhread ~label:"ftruncate" @@ fun () ->
Fd.use_exn "ftruncate" fd @@ fun fd ->
Unix.LargeFile.ftruncate fd (Optint.Int63.to_int64 len)

Expand Down
2 changes: 1 addition & 1 deletion lib_eio_posix/net.ml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ let getaddrinfo ~service node =
| _ -> None)
| _ -> None
in
Err.run Eio_unix.run_in_systhread @@ fun () ->
Err.run (Eio_unix.run_in_systhread ~label:"getaddrinfo") @@ fun () ->
let rec aux () =
try
Unix.getaddrinfo node service []
Expand Down
2 changes: 1 addition & 1 deletion lib_eio_windows/net.ml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ let getaddrinfo ~service node =
| _ -> None)
| _ -> None
in
Err.run Eio_unix.run_in_systhread @@ fun () ->
Err.run (Eio_unix.run_in_systhread ~label:"getaddrinfo") @@ fun () ->
let rec aux () =
try
Unix.getaddrinfo node service []
Expand Down

0 comments on commit 0776a7d

Please sign in to comment.