Skip to content

Commit

Permalink
Compress the compiled bytecode.
Browse files Browse the repository at this point in the history
The compression scheme is quite naive. It is based on the observation
that, in most cases, a bytecode word is a small byte followed by three nul
bytes. In that case, it is directly stored as a single byte. In the other
cases, more bytes are used, up to 5 bytes to store a full word.

This brings a 4% reduction of the overall .vo size of the standard
library, that is, a 2MB reduction.
  • Loading branch information
silene committed Apr 19, 2024
1 parent 9082a98 commit 30d1ed4
Showing 1 changed file with 69 additions and 3 deletions.
72 changes: 69 additions & 3 deletions kernel/vmemitcodes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,80 @@ let iter f s =

end

(* Most of the words of the bytecode are comprised of a byte followed by three
nul bytes. It is compressed as follows. In the common case, only the byte is
output. In the other cases (or when the byte is too large), 255 is output
followed by the four original bytes, or 254 is output followed by the first
three original bytes (assuming the fourth is nul), or 253... *)

let compress_code src sz =
let buf = Buffer.create 16 in
for i = 0 to sz / 4 - 1 do
let c01 = Bytes.get_uint16_le src (i * 4) in
let c23 = Bytes.get_uint16_le src (i * 4 + 2) in
if c23 = 0 then
if c01 < 253 then
Buffer.add_uint8 buf c01
else
begin
Buffer.add_uint8 buf 253;
Buffer.add_uint16_le buf c01;
end
else if c23 < 256 then
begin
Buffer.add_uint8 buf 254;
Buffer.add_uint16_le buf c01;
Buffer.add_uint8 buf c23;
end
else
begin
Buffer.add_uint8 buf 255;
Buffer.add_uint16_le buf c01;
Buffer.add_uint16_le buf c23;
end
done;
Buffer.contents buf

let decompress_code src =
let buf = Buffer.create 16 in
let sz = String.length src in
let i = ref 0 in
while !i < sz do
let c = String.get_uint8 src !i in
let c01, c23 =
if c < 253 then
begin
i := !i + 1;
(c, 0)
end
else if c = 253 then
begin
i := !i + 3;
(String.get_uint16_le src (!i - 2), 0)
end
else if c = 254 then
begin
i := !i + 4;
(String.get_uint16_le src (!i - 3), String.get_uint8 src (!i - 1))
end
else
begin
i := !i + 5;
(String.get_uint16_le src (!i - 4), String.get_int16_le src (!i - 2))
end in
Buffer.add_uint16_le buf c01;
Buffer.add_uint16_le buf c23;
done;
Buffer.to_bytes buf


(** This data type is stored in vo files. *)
type patches = {
reloc_infos : reloc_info array;
}

let patch_int buff reloc positions =
let buff = Bytes.of_string buff in
let buff = decompress_code buff in
let iter pos =
let id = Bytes.get_int32_le buff pos in
let reloc = reloc.(Int32.to_int id) in
Expand Down Expand Up @@ -513,8 +580,7 @@ let to_memory fv code =
reloc_info = RelocTable.create 91;
} in
emit env code [];
(** Later uses of this string are all purely functional *)
let code = Bytes.sub_string env.out_buffer 0 env.out_position in
let code = compress_code env.out_buffer env.out_position in
let code = CString.hcons code in
let fold reloc id accu = (id, reloc) :: accu in
let reloc = RelocTable.fold fold env.reloc_info [] in
Expand Down

0 comments on commit 30d1ed4

Please sign in to comment.