Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved function arity analysis #1397

Merged
merged 8 commits into from
Mar 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* Compiler: stop parsing the builtin js runtime if not necessary
* Compiler: improve js pretty printer (#1405)
* Compiler: improve debug location and speedup compilation (#1407)
* Compiler: improve analysis for more direct call (#1397)
* Toplevel: Enable separate compilation of toplevels
* Runtime: js backtrace recording controled by OCAMLRUNPARAM
* Runtime: support for zstd decompression of marshalled data (ocaml.5.1) (#12006)
Expand Down
38 changes: 30 additions & 8 deletions compiler/lib/driver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ let debug = Debug.find "main"

let times = Debug.find "times"

type profile =
| O1
| O2
| O3

let should_export = function
| `Iife -> false
| `Named _ | `Anonymous -> true
Expand All @@ -49,7 +54,7 @@ let inline p =

let specialize_1 (p, info) =
if debug () then Format.eprintf "Specialize...@.";
Specialize.f info p
Specialize.f ~function_arity:(fun f -> Specialize.function_arity info f) p

let specialize_js (p, info) =
if debug () then Format.eprintf "Specialize js...@.";
Expand Down Expand Up @@ -91,6 +96,18 @@ let effects p =
p |> Deadcode.f +> Effects.f +> map_fst Lambda_lifting.f)
else p, (Code.Var.Set.empty : Effects.cps_calls)

let exact_calls profile p =
if not (Config.Flag.effects ())
then
let fast =
match profile with
| O3 -> false
| O1 | O2 -> true
in
let info = Global_flow.f ~fast p in
Specialize.f ~function_arity:(fun f -> Global_flow.function_arity info f) p
else p

let print p =
if debug () then Code.Print.program (fun _ _ -> "") p;
p
Expand Down Expand Up @@ -551,8 +568,6 @@ let configure formatter =
Code.Var.set_pretty (pretty && not (Config.Flag.shortvar ()));
Code.Var.set_stable (Config.Flag.stable_var ())

type profile = Code.program -> Code.program

let full
~standalone
~wrap_with_fun
Expand All @@ -565,7 +580,14 @@ let full
p =
let exported_runtime = not standalone in
let opt =
specialize_js_once +> profile +> effects +> map_fst (Generate_closure.f +> deadcode')
specialize_js_once
+> (match profile with
| O1 -> o1
| O2 -> o2
| O3 -> o3)
+> exact_calls profile
+> effects
+> map_fst (Generate_closure.f +> deadcode')
in
let emit =
generate d ~exported_runtime ~wrap_with_fun ~warn_on_unhandled_effect:standalone
Expand Down Expand Up @@ -607,7 +629,7 @@ let full_no_source_map
let f
?(standalone = true)
?(wrap_with_fun = `Iife)
?(profile = o1)
?(profile = O1)
?(linkall = false)
?source_map
?custom_header
Expand All @@ -628,7 +650,7 @@ let f
let f'
?(standalone = true)
?(wrap_with_fun = `Iife)
?(profile = o1)
?(profile = O1)
?(linkall = false)
?custom_header
formatter
Expand All @@ -649,13 +671,13 @@ let from_string ~prims ~debug s formatter =
full_no_source_map
~standalone:false
~wrap_with_fun:`Anonymous
~profile:o1
~profile:O1
~linkall:false
~custom_header:None
formatter
d
p

let profiles = [ 1, o1; 2, o2; 3, o3 ]
let profiles = [ 1, O1; 2, O2; 3, O3 ]

let profile i = try Some (List.assoc i profiles) with Not_found -> None
2 changes: 1 addition & 1 deletion compiler/lib/effects.ml
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ let remove_empty_blocks ~live_vars (p : Code.program) : Code.program =
let f (p, live_vars) =
let t = Timer.make () in
let p = remove_empty_blocks ~live_vars p in
let flow_info = Global_flow.f p in
let flow_info = Global_flow.f ~fast:false p in
let cps_needed = Partial_cps_analysis.f p flow_info in
let p, cps_needed = rewrite_toplevel ~cps_needed p in
let p = split_blocks ~cps_needed p in
Expand Down
9 changes: 8 additions & 1 deletion compiler/lib/generate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,14 @@ let parallel_renaming params args continuation queue =

let apply_fun_raw ctx f params exact cps =
let n = List.length params in
let apply_directly = J.call f params J.N in
let apply_directly =
(* Make sure we are performing a regular call, not a (slower)
method call *)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vouillon, can you provide some reference mentioning this optimization. I did some test and it seems to show the opposite.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have any reference, but without this, the improved analysis resulted in slower code.
I just tried is compiling ocamlc.byte (dune exec -- js_of_ocaml --opt 3 which ocamlc.byte -o /tmp/ocamlc.js).
And then running it on some ml source files:

time node /tmp/ocamlc.js -c ./benchmarks/sources/ml/*.ml ./benchmarks/sources/ml/*.ml ./benchmarks/sources/ml/*.ml

This optimization makes close to a 8% performance improvement on my machine.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What ocaml version do you use ? it seems that ocamlc no longer works when compiled to js due to ocaml/ocaml#11997 (since OCaml 5.1). The reason is that Reloc_literal can now contain floats that jsoo doesn't want to marshal.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried with Ocaml 4.14.0.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This optimization makes close to a 8% performance improvement on my machine.

Results are too noisy on my laptop but I can see some improvements indeed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using python3 -m pyperf system tune to reduce the noise.

match f with
| J.EAccess _ | J.EDot _ ->
J.call (J.dot f (Utf8_string.of_string_exn "call")) (s_var "null" :: params) J.N
| _ -> J.call f params J.N
in
let apply =
(* We skip the arity check when we know that we have the right
number of parameters, since this test is expensive. *)
Expand Down
128 changes: 83 additions & 45 deletions compiler/lib/global_flow.ml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ type state =
; applied_functions : (Var.t * Var.t, unit) Hashtbl.t
(* Functions that have been already considered at a call site.
This is to avoid repeated computations *)
; fast : bool
}

let add_var st x = Var.ISet.add st.vars x
Expand All @@ -126,7 +127,8 @@ let add_assign_def st x y =
let add_param_def st x =
add_var st x;
let idx = Var.idx x in
assert (is_undefined st.defs.(idx))
assert (is_undefined st.defs.(idx));
if st.fast then st.defs.(idx) <- Phi { known = Var.Set.empty; others = true }

let rec arg_deps st ?ignore params args =
match params, args with
Expand Down Expand Up @@ -155,6 +157,12 @@ let expr_deps blocks st x e =
(* The analysis knowns about these primitives, and will compute
an approximation of the value they return based on an
approximation of their arguments *)
(if st.fast
then
match l with
| Pv x :: _ -> do_escape st Escape x
| Pc _ :: _ -> ()
| [] -> assert false);
List.iter
~f:(fun a ->
match a with
Expand Down Expand Up @@ -207,7 +215,7 @@ let expr_deps blocks st x e =
match st.defs.(Var.idx f) with
| Expr (Closure (params, _)) when List.length args = List.length params ->
Hashtbl.add st.applied_functions (x, f) ();
List.iter2 ~f:(fun p a -> add_assign_def st p a) params args;
if not st.fast then List.iter2 ~f:(fun p a -> add_assign_def st p a) params args;
Var.Set.iter (fun y -> add_dep st x y) (Var.Map.find f st.return_values)
| _ -> ())
| Closure (l, cont) ->
Expand Down Expand Up @@ -243,17 +251,19 @@ let program_deps st { blocks; _ } =
~f:(fun i (pc, _) ->
Hashtbl.replace h pc (i :: (try Hashtbl.find h pc with Not_found -> [])))
a2;
Hashtbl.iter
(fun pc tags ->
let block = Addr.Map.find pc blocks in
List.iter
~f:(fun (i, _) ->
match i with
| Let (y, Field (x', _)) when Var.equal x x' ->
Hashtbl.add st.known_cases y tags
| _ -> ())
block.body)
h
if not st.fast
then
Hashtbl.iter
(fun pc tags ->
let block = Addr.Map.find pc blocks in
List.iter
~f:(fun (i, _) ->
match i with
| Let (y, Field (x', _)) when Var.equal x x' ->
Hashtbl.add st.known_cases y tags
| _ -> ())
block.body)
h
| Pushtrap (cont, x, cont_h, _) ->
add_var st x;
st.defs.(Var.idx x) <- Phi { known = Var.Set.empty; others = true };
Expand Down Expand Up @@ -406,31 +416,34 @@ let propagate st ~update approx x =
| Top -> Top)
| Prim (Extern "caml_check_bound", [ Pv y; _ ]) -> Var.Tbl.get approx y
| Prim ((Array_get | Extern "caml_array_unsafe_get"), [ Pv y; _ ]) -> (
match Var.Tbl.get approx y with
| Values { known; others } ->
Domain.join_set
~update
~st
~approx
~others
(fun z ->
match st.defs.(Var.idx z) with
| Expr (Block (_, lst, _)) ->
Array.iter ~f:(fun t -> add_dep st x t) lst;
let a =
Array.fold_left
~f:(fun acc t ->
Domain.join ~update ~st ~approx (Var.Tbl.get approx t) acc)
~init:Domain.bot
lst
in
if st.possibly_mutable.(Var.idx z)
then Domain.join ~update ~st ~approx Domain.others a
else a
| Expr (Closure _) -> Domain.bot
| Phi _ | Expr _ -> assert false)
known
| Top -> Top)
if st.fast
then Domain.others
else
match Var.Tbl.get approx y with
| Values { known; others } ->
Domain.join_set
~update
~st
~approx
~others
(fun z ->
match st.defs.(Var.idx z) with
| Expr (Block (_, lst, _)) ->
Array.iter ~f:(fun t -> add_dep st x t) lst;
let a =
Array.fold_left
~f:(fun acc t ->
Domain.join ~update ~st ~approx (Var.Tbl.get approx t) acc)
~init:Domain.bot
lst
in
if st.possibly_mutable.(Var.idx z)
then Domain.join ~update ~st ~approx Domain.others a
else a
| Expr (Closure _) -> Domain.bot
| Phi _ | Expr _ -> assert false)
known
| Top -> Top)
| Prim (Array_get, _) -> Domain.others
| Prim ((Vectlength | Not | IsInt | Eq | Neq | Lt | Le | Ult), _) ->
(* The result of these primitive is neither a function nor a
Expand All @@ -457,12 +470,14 @@ let propagate st ~update approx x =
if not (Hashtbl.mem st.applied_functions (x, g))
then (
Hashtbl.add st.applied_functions (x, g) ();
List.iter2
~f:(fun p a ->
add_assign_def st p a;
update ~children:false p)
params
args;
if not st.fast
then
List.iter2
~f:(fun p a ->
add_assign_def st p a;
update ~children:false p)
params
args;
Var.Set.iter
(fun y -> add_dep st x y)
(Var.Map.find g st.return_values));
Expand Down Expand Up @@ -527,7 +542,7 @@ type info =
; info_may_escape : bool array
}

let f p =
let f ~fast p =
let t = Timer.make () in
let t1 = Timer.make () in
let rets = return_values p in
Expand All @@ -550,6 +565,7 @@ let f p =
; possibly_mutable
; known_cases = Hashtbl.create 16
; applied_functions = Hashtbl.create 16
; fast
}
in
program_deps st p;
Expand Down Expand Up @@ -621,3 +637,25 @@ let exact_call info f n =
| Expr (Block _) -> true
| Expr _ | Phi _ -> assert false)
known

let function_arity info f =
match Var.Tbl.get info.info_approximation f with
| Top | Values { others = true; _ } -> None
| Values { known; others = false } -> (
match
Var.Set.fold
(fun g acc ->
match info.info_defs.(Var.idx g) with
| Expr (Closure (params, _)) -> (
let n = List.length params in
match acc with
| None -> Some (Some n)
| Some (Some n') when n <> n' -> Some None
hhugo marked this conversation as resolved.
Show resolved Hide resolved
| Some _ -> acc)
| Expr (Block _) -> acc
| Expr _ | Phi _ -> assert false)
known
None
with
| Some v -> v
| None -> None)
4 changes: 3 additions & 1 deletion compiler/lib/global_flow.mli
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ type info =
; info_may_escape : bool array
}

val f : Code.program -> info
val f : fast:bool -> Code.program -> info

val exact_call : info -> Var.t -> int -> bool

val function_arity : info -> Var.t -> int option
Loading