[multicore] configurable caching policy for attributes

Summary: Caches are treated differently in process mode and in multicore mode: In process mode, the caches are cleaned after every target, to avoid taking too much memory. In multicore mode, the caches are shared, and therefore cleaning them reduces the benefit. This diff makes the procedure attributes cache behave as usual in process mode, but in a bounded LRU mode in multicore mode. We also add code to log hit rate statistics in a uniform manner for caches. The default maximum size of the LRU cache in multicore mode (here, 500) was computed as the (rough) minimum that would guarantee the same hit rate in multicore mode as the (aggregate) hit rate in process mode, for selected code bases. Reviewed By: davidpichardie Differential Revision: D69409519 Privacy Context Container: L1208441 fbshipit-source-id: 2d17f7386098ae33183f402f353cfea7ca9b520b
facebook · Feb 12, 2025 · 3d7d662 · 3d7d662
1 parent 9b2c69e
commit 3d7d662
Show file tree

Hide file tree

Showing 10 changed files with 193 additions and 7 deletions.
diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt
@@ -2425,6 +2425,10 @@ INTERNAL OPTIONS
        --append-buck-flavors-reset
            Set --append-buck-flavors to the empty list.
 
+       --attributes-lru-max-size int
+           Specify size of procedure attribute LRU cache. Relevant only to
+           multicore mode. Defaults to 500
+
        --backtrack-level int
            Maximum level of backtracking to convert an absolute path to path
            relative to the common prefix between the project root and the

diff --git a/infer/src/IR/Attributes.ml b/infer/src/IR/Attributes.ml
@@ -29,19 +29,21 @@ let find =
       (run_query select_statement_adb)
 
 
-module Cache = Concurrent.MakeMap (Procname.Map)
+module Cache = Concurrent.MakeCache (struct
+  type t = Procname.t [@@deriving compare, equal, hash, show, sexp]
+end)
 
-let load, clear_cache, store =
+let load, clear_cache, store, set_lru_limit =
   (* capture DB attribute cache: only keeps positive entries as analysis may add entries *)
-  let cache : ProcAttributes.t Cache.t = Cache.empty () in
+  let cache : ProcAttributes.t Cache.t = Cache.create ~name:"attributes" in
   let load_from_uid uid =
     let result = find uid in
     Option.iter result ~f:(fun attrs -> Cache.add cache (ProcAttributes.get_proc_name attrs) attrs) ;
     result
   in
   let load pname =
     Dependencies.record_pname_dep Other pname ;
-    match Cache.find_opt cache pname with
+    match Cache.lookup cache pname with
     | Some _ as result ->
         result
     | None -> (
@@ -72,7 +74,8 @@ let load, clear_cache, store =
     DBWriter.replace_attributes ~proc_uid ~proc_attributes ~cfg ~callees ~analysis ;
     Cache.remove cache pname
   in
-  (load, clear_cache, store)
+  let set_lru_limit ~lru_limit = Cache.set_lru_mode cache ~lru_limit in
+  (load, clear_cache, store, set_lru_limit)
 
 
 let load_exn pname = Option.value_exn (load pname)

diff --git a/infer/src/IR/Attributes.mli b/infer/src/IR/Attributes.mli
@@ -25,3 +25,5 @@ val load_formal_types : Procname.t -> Typ.t list
 
 val clear_cache : unit -> unit
 (** clear attribute cache *)
+
+val set_lru_limit : lru_limit:int option -> unit
diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml
@@ -16,7 +16,7 @@ open TaskSchedulerTypes
 let clear_caches () =
   Summary.OnDisk.clear_cache () ;
   BufferOverrunUtils.clear_cache () ;
-  Attributes.clear_cache () ;
+  if not Config.multicore then Attributes.clear_cache () ;
   Dependencies.clear ()
 
 
@@ -159,6 +159,7 @@ let analyze replay_call_graph source_files_to_analyze =
     , [MissingDependencies.get ()] ) )
   else if Config.multicore then (
     let pre_analysis_gc_stats = GCStats.get ~since:ProgramStart in
+    Attributes.set_lru_limit ~lru_limit:(Some Config.attributes_lru_max_size) ;
     DomainPool.create ~jobs:Config.jobs ~f:analyze_target ~child_prologue:ignore
       ~child_epilogue:ignore ~tasks:(fun () ->
         tasks_generator_builder_for replay_call_graph (Lazy.force source_files_to_analyze) )

diff --git a/infer/src/base/Concurrent.ml b/infer/src/base/Concurrent.ml
@@ -144,3 +144,65 @@ struct
 
   let wrap_hashtable hash = {mutex= Error_checking_mutex.create (); hash}
 end
+
+module type CacheS = sig
+  type key
+
+  type 'a t
+
+  val create : name:string -> 'a t
+
+  val lookup : 'a t -> key -> 'a option
+
+  val add : 'a t -> key -> 'a -> unit
+
+  val remove : 'a t -> key -> unit
+
+  val clear : 'a t -> unit
+
+  val set_lru_mode : 'a t -> lru_limit:int option -> unit
+end
+
+module MakeCache (Key : sig
+  type t [@@deriving compare, equal, hash, show, sexp]
+end) : CacheS with type key = Key.t = struct
+  module HQ = Hash_queue.Make (Key)
+
+  type key = Key.t
+
+  type 'a t =
+    {mutex: Error_checking_mutex.t; name: string; hq: 'a HQ.t; mutable lru_limit: int option}
+
+  let create ~name = {name; mutex= Error_checking_mutex.create (); hq= HQ.create (); lru_limit= None}
+
+  let in_mutex {mutex; hq} ~f = Error_checking_mutex.critical_section mutex ~f:(fun () -> f hq)
+
+  let add t k v =
+    in_mutex t ~f:(fun hq ->
+        HQ.remove hq k |> ignore ;
+        HQ.enqueue_front_exn hq k v ;
+        match t.lru_limit with
+        | None ->
+            ()
+        | Some limit ->
+            let n = HQ.length hq - limit in
+            if n > 0 then HQ.drop_back ~n hq )
+
+
+  let lookup t k =
+    in_mutex t ~f:(fun hq ->
+        let result_opt = HQ.lookup_and_move_to_front hq k in
+        if Option.is_some result_opt then Stats.add_cache_hit ~name:t.name
+        else Stats.add_cache_miss ~name:t.name ;
+        result_opt )
+
+
+  let clear t = in_mutex t ~f:HQ.clear
+
+  let remove t key = in_mutex t ~f:(fun h -> HQ.remove h key |> ignore)
+
+  let set_lru_mode t ~lru_limit =
+    in_mutex t ~f:(fun hq ->
+        t.lru_limit <- lru_limit ;
+        HQ.clear hq )
+end
diff --git a/infer/src/base/Concurrent.mli b/infer/src/base/Concurrent.mli
@@ -78,3 +78,25 @@ end
 
 (** a thread safe hashtable *)
 module MakeHashtbl (H : Stdlib.Hashtbl.S) : Hashtbl with type key = H.key with module Hash = H
+
+module type CacheS = sig
+  type key
+
+  type 'a t
+
+  val create : name:string -> 'a t
+
+  val lookup : 'a t -> key -> 'a option
+
+  val add : 'a t -> key -> 'a -> unit
+
+  val remove : 'a t -> key -> unit
+
+  val clear : 'a t -> unit
+
+  val set_lru_mode : 'a t -> lru_limit:int option -> unit
+end
+
+module MakeCache (Key : sig
+  type t [@@deriving compare, equal, hash, show, sexp]
+end) : CacheS with type key = Key.t
diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml
@@ -672,6 +672,12 @@ and append_buck_flavors =
      $(b,--buck-compilation-database) option."
 
 
+and attributes_lru_max_size =
+  CLOpt.mk_int ~long:"attributes-lru-max-size" ~meta:"int" ~default:500
+    "Specify size of procedure attribute LRU cache. Relevant only to multicore mode. Defaults to \
+     500"
+
+
 and biabduction_abs_struct =
   CLOpt.mk_int ~long:"biabduction-abs-struct" ~default:1 ~meta:"int"
     {|Specify abstraction level for fields of structs:
@@ -3895,6 +3901,8 @@ and annotation_reachability_report_source_and_sink = !annotation_reachability_re
 
 and append_buck_flavors = RevList.to_list !append_buck_flavors
 
+and attributes_lru_max_size = !attributes_lru_max_size
+
 and biabduction_abs_struct = !biabduction_abs_struct
 
 and biabduction_abs_val = !biabduction_abs_val

diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli
@@ -140,6 +140,8 @@ val annotation_reachability_report_source_and_sink : bool
 
 val append_buck_flavors : string list
 
+val attributes_lru_max_size : int
+
 val biabduction_abs_struct : int
 
 val biabduction_abs_val : int

diff --git a/infer/src/base/Stats.ml b/infer/src/base/Stats.ml
@@ -155,10 +155,78 @@ module LongestProcDurationHeap = struct
   include Heap
 end
 
+module CacheStats = struct
+  type cache_data = {mutable hits: int; mutable misses: int}
+
+  type t = cache_data IString.Map.t
+
+  let merge m1 m2 =
+    IString.Map.merge
+      (fun _name data1_opt data2_opt ->
+        match (data1_opt, data2_opt) with
+        | _, None ->
+            data1_opt
+        | None, _ ->
+            data2_opt
+        | Some data1, Some data2 ->
+            Some {hits= data1.hits + data2.hits; misses= data1.misses + data2.misses} )
+      m1 m2
+
+
+  let get_stats {hits; misses} =
+    let total_queries = hits + misses in
+    let hit_rate =
+      int_of_float @@ Float.round (float_of_int (100 * hits) /. float_of_int total_queries)
+    in
+    (hit_rate, total_queries)
+
+
+  let pp fmt t =
+    let pp_cache_data name data =
+      let hit_rate, total_queries = get_stats data in
+      F.fprintf fmt "cache stats: name=%s; hit rate=%d%%; total queries=%d@;" name hit_rate
+        total_queries
+    in
+    IString.Map.iter pp_cache_data t
+
+
+  let to_log_entries ~field_name:_ t =
+    IString.Map.fold
+      (fun name data acc ->
+        let hit_rate, total_queries = get_stats data in
+        LogEntry.mk_count ~label:(F.sprintf "backend_stats.cache.%s.hit_rate" name) ~value:hit_rate
+        :: LogEntry.mk_count
+             ~label:(F.sprintf "backend_stats.cache.%s.total_queries" name)
+             ~value:total_queries
+        :: acc )
+      t []
+
+
+  let init = IString.Map.empty
+
+  let add_hit t ~name =
+    IString.Map.update name
+      (fun data_opt ->
+        let data = Option.value data_opt ~default:{hits= 0; misses= 0} in
+        data.hits <- data.hits + 1 ;
+        Some data )
+      t
+
+
+  let add_miss t ~name =
+    IString.Map.update name
+      (fun data_opt ->
+        let data = Option.value data_opt ~default:{hits= 0; misses= 0} in
+        data.misses <- data.misses + 1 ;
+        Some data )
+      t
+end
+
 (* NOTE: there is a custom ppx for this data structure to generate boilerplate, see
    src/inferppx/StatsPpx.mli *)
 type t =
-  { mutable summary_file_try_load: IntCounter.t
+  { mutable cache_stats: CacheStats.t
+  ; mutable summary_file_try_load: IntCounter.t
   ; mutable summary_read_from_disk: IntCounter.t
   ; mutable summary_cache_hits: IntCounter.t
   ; mutable summary_cache_misses: IntCounter.t
@@ -255,6 +323,10 @@ let pp fmt stats =
     in
     F.fprintf fmt "pulse_summaries_total_disjuncts= %d@;" total
   in
+  let pp_cache_stats fmt field =
+    let cache_stats : CacheStats.t = Field.get field stats in
+    CacheStats.pp fmt cache_stats
+  in
   Fields.iter ~summary_file_try_load:(pp_int_field fmt) ~useful_times:(pp_time_counter_field fmt)
     ~longest_proc_duration_heap:(pp_longest_proc_duration_heap fmt)
     ~summary_read_from_disk:(pp_int_field fmt)
@@ -284,6 +356,7 @@ let pp fmt stats =
     ~restart_scheduler_total_time:(pp_time_counter_field fmt)
     ~spec_store_times:(pp_time_counter_field fmt) ~topl_reachable_calls:(pp_int_field fmt)
     ~timings:(pp_serialized_field TimingsStat.deserialize Timings.pp fmt)
+    ~cache_stats:(pp_cache_stats fmt)
 
 
 (** machine-readable printing of selected fields, for tests *)
@@ -494,3 +567,8 @@ let set_useful_times execution_duration =
 
 let incr_spec_store_times counter =
   update_with Fields.spec_store_times ~f:(fun t -> TimeCounter.add_duration_since t counter)
+
+
+let add_cache_hit ~name = update_with Fields.cache_stats ~f:(fun t -> CacheStats.add_hit t ~name)
+
+let add_cache_miss ~name = update_with Fields.cache_stats ~f:(fun t -> CacheStats.add_miss t ~name)
diff --git a/infer/src/base/Stats.mli b/infer/src/base/Stats.mli
@@ -84,3 +84,7 @@ val get : unit -> t
 
 val log_aggregate : t list -> unit
 (** log aggregated stats to infer's log file and to stats *)
+
+val add_cache_hit : name:string -> unit
+
+val add_cache_miss : name:string -> unit
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,3 +25,5 @@ val load_formal_types : Procname.t -> Typ.t list

		val clear_cache : unit -> unit
		(** clear attribute cache *)

		val set_lru_limit : lru_limit:int option -> unit