Skip to content

Commit

Permalink
Compress the compiled bytecode.
Browse files Browse the repository at this point in the history
The compression scheme is quite naive. It is based on the observation
that, in most cases, a bytecode word is a small byte followed by three nul
bytes. In that case, it is directly stored as a single byte. In the other
cases, more bytes are used, up to 5 bytes to store a full word.

This brings a 4% reduction of the overall .vo size of the standard
library, that is, a 2MB reduction.
  • Loading branch information
silene committed Apr 20, 2024
1 parent 1ea4a28 commit 3d66700
Showing 1 changed file with 73 additions and 3 deletions.
76 changes: 73 additions & 3 deletions kernel/vmemitcodes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,84 @@ let iter f s =

end

(* Most of the words of the bytecode are comprised of a byte followed by three
nul bytes. It is compressed as follows. In the common case, only the byte is
output. In the other cases (or when the byte is too large), 255 is output
followed by the four original bytes, or 254 is output followed by the first
three original bytes (assuming the fourth is nul), or 253 or 252. *)

let compress_code src sz =
let buf = Buffer.create (sz * 3 / 8) in
for i = 0 to sz / 4 - 1 do
let c01 = Bytes.get_uint16_le src (i * 4) in
let c23 = Bytes.get_uint16_le src (i * 4 + 2) in
if c23 = 0 then
if c01 < 252 then
Buffer.add_uint8 buf c01
else
begin
Buffer.add_uint8 buf 253;
Buffer.add_uint16_le buf c01;
end
else if c23 = 0xffff && c01 >= 0xff00 then
begin
Buffer.add_uint8 buf 252;
Buffer.add_uint8 buf c01;
end
else if c23 <= 0xff then
begin
Buffer.add_uint8 buf 254;
Buffer.add_uint16_le buf c01;
Buffer.add_uint8 buf c23;
end
else
begin
Buffer.add_uint8 buf 255;
Buffer.add_uint16_le buf c01;
Buffer.add_uint16_le buf c23;
end
done;
Buffer.contents buf

let decompress_code src =
let sz = String.length src in
let buf = Buffer.create (sz * 4) in
(* TODO: remove the following two lines once the minimal version of OCaml is 4.13 *)
let module String = Bytes in
let src = String.unsafe_of_string src in
let i = ref 0 in
while !i < sz do
let c01, c23 =
match String.get src !i with
| '\000' .. '\251' as c ->
i := !i + 1;
(Char.code c, 0)
| '\252' ->
i := !i + 2;
(String.get_uint8 src (!i - 1) + 0xff00, 0xffff)
| '\253' ->
i := !i + 3;
(String.get_uint16_le src (!i - 2), 0)
| '\254' ->
i := !i + 4;
(String.get_uint16_le src (!i - 3), String.get_uint8 src (!i - 1))
| '\255' ->
i := !i + 5;
(String.get_uint16_le src (!i - 4), String.get_int16_le src (!i - 2))
in
Buffer.add_uint16_le buf c01;
Buffer.add_uint16_le buf c23;
done;
Buffer.to_bytes buf


(** This data type is stored in vo files. *)
type patches = {
reloc_infos : reloc_info array;
}

let patch_int buff reloc positions =
let buff = Bytes.of_string buff in
let buff = decompress_code buff in
let iter pos =
let id = Bytes.get_int32_le buff pos in
let reloc = reloc.(Int32.to_int id) in
Expand Down Expand Up @@ -513,8 +584,7 @@ let to_memory fv code =
reloc_info = RelocTable.create 91;
} in
emit env code [];
(** Later uses of this string are all purely functional *)
let code = Bytes.sub_string env.out_buffer 0 env.out_position in
let code = compress_code env.out_buffer env.out_position in
let code = CString.hcons code in
let fold reloc id accu = (id, reloc) :: accu in
let reloc = RelocTable.fold fold env.reloc_info [] in
Expand Down

0 comments on commit 3d66700

Please sign in to comment.