Skip to content

Commit

Permalink
[multicore] configurable caching policy for attributes
Browse files Browse the repository at this point in the history
Summary:
Caches are treated differently in process mode and in multicore mode:
In process mode, the caches are cleaned after every target, to avoid taking too much memory.
In multicore mode, the caches are shared, and therefore cleaning them reduces the benefit.

This diff makes the procedure attributes cache behave as usual in process mode, but in a bounded LRU mode in multicore mode.
We also add code to log hit rate statistics in a uniform manner for caches.
The default maximum size of the LRU cache in multicore mode (here, 500) was computed as the (rough) minimum that would guarantee the same hit rate in multicore mode as the (aggregate) hit rate in process mode, for selected code bases.

Reviewed By: davidpichardie

Differential Revision:
D69409519

Privacy Context Container: L1208441

fbshipit-source-id: 2d17f7386098ae33183f402f353cfea7ca9b520b
  • Loading branch information
ngorogiannis authored and facebook-github-bot committed Feb 12, 2025
1 parent 9b2c69e commit 3d7d662
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 7 deletions.
4 changes: 4 additions & 0 deletions infer/man/man1/infer-full.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2425,6 +2425,10 @@ INTERNAL OPTIONS
--append-buck-flavors-reset
Set --append-buck-flavors to the empty list.

--attributes-lru-max-size int
Specify size of procedure attribute LRU cache. Relevant only to
multicore mode. Defaults to 500

--backtrack-level int
Maximum level of backtracking to convert an absolute path to path
relative to the common prefix between the project root and the
Expand Down
13 changes: 8 additions & 5 deletions infer/src/IR/Attributes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,21 @@ let find =
(run_query select_statement_adb)


module Cache = Concurrent.MakeMap (Procname.Map)
module Cache = Concurrent.MakeCache (struct
type t = Procname.t [@@deriving compare, equal, hash, show, sexp]
end)

let load, clear_cache, store =
let load, clear_cache, store, set_lru_limit =
(* capture DB attribute cache: only keeps positive entries as analysis may add entries *)
let cache : ProcAttributes.t Cache.t = Cache.empty () in
let cache : ProcAttributes.t Cache.t = Cache.create ~name:"attributes" in
let load_from_uid uid =
let result = find uid in
Option.iter result ~f:(fun attrs -> Cache.add cache (ProcAttributes.get_proc_name attrs) attrs) ;
result
in
let load pname =
Dependencies.record_pname_dep Other pname ;
match Cache.find_opt cache pname with
match Cache.lookup cache pname with
| Some _ as result ->
result
| None -> (
Expand Down Expand Up @@ -72,7 +74,8 @@ let load, clear_cache, store =
DBWriter.replace_attributes ~proc_uid ~proc_attributes ~cfg ~callees ~analysis ;
Cache.remove cache pname
in
(load, clear_cache, store)
let set_lru_limit ~lru_limit = Cache.set_lru_mode cache ~lru_limit in
(load, clear_cache, store, set_lru_limit)


let load_exn pname = Option.value_exn (load pname)
Expand Down
2 changes: 2 additions & 0 deletions infer/src/IR/Attributes.mli
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ val load_formal_types : Procname.t -> Typ.t list

val clear_cache : unit -> unit
(** clear attribute cache *)

val set_lru_limit : lru_limit:int option -> unit
3 changes: 2 additions & 1 deletion infer/src/backend/InferAnalyze.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ open TaskSchedulerTypes
let clear_caches () =
Summary.OnDisk.clear_cache () ;
BufferOverrunUtils.clear_cache () ;
Attributes.clear_cache () ;
if not Config.multicore then Attributes.clear_cache () ;
Dependencies.clear ()


Expand Down Expand Up @@ -159,6 +159,7 @@ let analyze replay_call_graph source_files_to_analyze =
, [MissingDependencies.get ()] ) )
else if Config.multicore then (
let pre_analysis_gc_stats = GCStats.get ~since:ProgramStart in
Attributes.set_lru_limit ~lru_limit:(Some Config.attributes_lru_max_size) ;
DomainPool.create ~jobs:Config.jobs ~f:analyze_target ~child_prologue:ignore
~child_epilogue:ignore ~tasks:(fun () ->
tasks_generator_builder_for replay_call_graph (Lazy.force source_files_to_analyze) )
Expand Down
62 changes: 62 additions & 0 deletions infer/src/base/Concurrent.ml
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,65 @@ struct

let wrap_hashtable hash = {mutex= Error_checking_mutex.create (); hash}
end

module type CacheS = sig
type key

type 'a t

val create : name:string -> 'a t

val lookup : 'a t -> key -> 'a option

val add : 'a t -> key -> 'a -> unit

val remove : 'a t -> key -> unit

val clear : 'a t -> unit

val set_lru_mode : 'a t -> lru_limit:int option -> unit
end

module MakeCache (Key : sig
type t [@@deriving compare, equal, hash, show, sexp]
end) : CacheS with type key = Key.t = struct
module HQ = Hash_queue.Make (Key)

type key = Key.t

type 'a t =
{mutex: Error_checking_mutex.t; name: string; hq: 'a HQ.t; mutable lru_limit: int option}

let create ~name = {name; mutex= Error_checking_mutex.create (); hq= HQ.create (); lru_limit= None}

let in_mutex {mutex; hq} ~f = Error_checking_mutex.critical_section mutex ~f:(fun () -> f hq)

let add t k v =
in_mutex t ~f:(fun hq ->
HQ.remove hq k |> ignore ;
HQ.enqueue_front_exn hq k v ;
match t.lru_limit with
| None ->
()
| Some limit ->
let n = HQ.length hq - limit in
if n > 0 then HQ.drop_back ~n hq )


let lookup t k =
in_mutex t ~f:(fun hq ->
let result_opt = HQ.lookup_and_move_to_front hq k in
if Option.is_some result_opt then Stats.add_cache_hit ~name:t.name
else Stats.add_cache_miss ~name:t.name ;
result_opt )


let clear t = in_mutex t ~f:HQ.clear

let remove t key = in_mutex t ~f:(fun h -> HQ.remove h key |> ignore)

let set_lru_mode t ~lru_limit =
in_mutex t ~f:(fun hq ->
t.lru_limit <- lru_limit ;
HQ.clear hq )
end
22 changes: 22 additions & 0 deletions infer/src/base/Concurrent.mli
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,25 @@ end

(** a thread safe hashtable *)
module MakeHashtbl (H : Stdlib.Hashtbl.S) : Hashtbl with type key = H.key with module Hash = H

module type CacheS = sig
type key

type 'a t

val create : name:string -> 'a t

val lookup : 'a t -> key -> 'a option

val add : 'a t -> key -> 'a -> unit

val remove : 'a t -> key -> unit

val clear : 'a t -> unit

val set_lru_mode : 'a t -> lru_limit:int option -> unit
end

module MakeCache (Key : sig
type t [@@deriving compare, equal, hash, show, sexp]
end) : CacheS with type key = Key.t
8 changes: 8 additions & 0 deletions infer/src/base/Config.ml
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,12 @@ and append_buck_flavors =
$(b,--buck-compilation-database) option."


and attributes_lru_max_size =
CLOpt.mk_int ~long:"attributes-lru-max-size" ~meta:"int" ~default:500
"Specify size of procedure attribute LRU cache. Relevant only to multicore mode. Defaults to \
500"


and biabduction_abs_struct =
CLOpt.mk_int ~long:"biabduction-abs-struct" ~default:1 ~meta:"int"
{|Specify abstraction level for fields of structs:
Expand Down Expand Up @@ -3895,6 +3901,8 @@ and annotation_reachability_report_source_and_sink = !annotation_reachability_re

and append_buck_flavors = RevList.to_list !append_buck_flavors

and attributes_lru_max_size = !attributes_lru_max_size

and biabduction_abs_struct = !biabduction_abs_struct

and biabduction_abs_val = !biabduction_abs_val
Expand Down
2 changes: 2 additions & 0 deletions infer/src/base/Config.mli
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ val annotation_reachability_report_source_and_sink : bool

val append_buck_flavors : string list

val attributes_lru_max_size : int

val biabduction_abs_struct : int

val biabduction_abs_val : int
Expand Down
80 changes: 79 additions & 1 deletion infer/src/base/Stats.ml
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,78 @@ module LongestProcDurationHeap = struct
include Heap
end

module CacheStats = struct
type cache_data = {mutable hits: int; mutable misses: int}

type t = cache_data IString.Map.t

let merge m1 m2 =
IString.Map.merge
(fun _name data1_opt data2_opt ->
match (data1_opt, data2_opt) with
| _, None ->
data1_opt
| None, _ ->
data2_opt
| Some data1, Some data2 ->
Some {hits= data1.hits + data2.hits; misses= data1.misses + data2.misses} )
m1 m2


let get_stats {hits; misses} =
let total_queries = hits + misses in
let hit_rate =
int_of_float @@ Float.round (float_of_int (100 * hits) /. float_of_int total_queries)
in
(hit_rate, total_queries)


let pp fmt t =
let pp_cache_data name data =
let hit_rate, total_queries = get_stats data in
F.fprintf fmt "cache stats: name=%s; hit rate=%d%%; total queries=%d@;" name hit_rate
total_queries
in
IString.Map.iter pp_cache_data t


let to_log_entries ~field_name:_ t =
IString.Map.fold
(fun name data acc ->
let hit_rate, total_queries = get_stats data in
LogEntry.mk_count ~label:(F.sprintf "backend_stats.cache.%s.hit_rate" name) ~value:hit_rate
:: LogEntry.mk_count
~label:(F.sprintf "backend_stats.cache.%s.total_queries" name)
~value:total_queries
:: acc )
t []


let init = IString.Map.empty

let add_hit t ~name =
IString.Map.update name
(fun data_opt ->
let data = Option.value data_opt ~default:{hits= 0; misses= 0} in
data.hits <- data.hits + 1 ;
Some data )
t


let add_miss t ~name =
IString.Map.update name
(fun data_opt ->
let data = Option.value data_opt ~default:{hits= 0; misses= 0} in
data.misses <- data.misses + 1 ;
Some data )
t
end

(* NOTE: there is a custom ppx for this data structure to generate boilerplate, see
src/inferppx/StatsPpx.mli *)
type t =
{ mutable summary_file_try_load: IntCounter.t
{ mutable cache_stats: CacheStats.t
; mutable summary_file_try_load: IntCounter.t
; mutable summary_read_from_disk: IntCounter.t
; mutable summary_cache_hits: IntCounter.t
; mutable summary_cache_misses: IntCounter.t
Expand Down Expand Up @@ -255,6 +323,10 @@ let pp fmt stats =
in
F.fprintf fmt "pulse_summaries_total_disjuncts= %d@;" total
in
let pp_cache_stats fmt field =
let cache_stats : CacheStats.t = Field.get field stats in
CacheStats.pp fmt cache_stats
in
Fields.iter ~summary_file_try_load:(pp_int_field fmt) ~useful_times:(pp_time_counter_field fmt)
~longest_proc_duration_heap:(pp_longest_proc_duration_heap fmt)
~summary_read_from_disk:(pp_int_field fmt)
Expand Down Expand Up @@ -284,6 +356,7 @@ let pp fmt stats =
~restart_scheduler_total_time:(pp_time_counter_field fmt)
~spec_store_times:(pp_time_counter_field fmt) ~topl_reachable_calls:(pp_int_field fmt)
~timings:(pp_serialized_field TimingsStat.deserialize Timings.pp fmt)
~cache_stats:(pp_cache_stats fmt)


(** machine-readable printing of selected fields, for tests *)
Expand Down Expand Up @@ -494,3 +567,8 @@ let set_useful_times execution_duration =

let incr_spec_store_times counter =
update_with Fields.spec_store_times ~f:(fun t -> TimeCounter.add_duration_since t counter)


let add_cache_hit ~name = update_with Fields.cache_stats ~f:(fun t -> CacheStats.add_hit t ~name)

let add_cache_miss ~name = update_with Fields.cache_stats ~f:(fun t -> CacheStats.add_miss t ~name)
4 changes: 4 additions & 0 deletions infer/src/base/Stats.mli
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,7 @@ val get : unit -> t

val log_aggregate : t list -> unit
(** log aggregated stats to infer's log file and to stats *)

val add_cache_hit : name:string -> unit

val add_cache_miss : name:string -> unit

0 comments on commit 3d7d662

Please sign in to comment.