-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathpdfmarks.ml
294 lines (272 loc) · 10.8 KB
/
pdfmarks.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
(* PDF Bookmarks *)
open Pdfutil
type t =
{level : int;
text : string;
target : Pdfdest.t;
isopen : bool;
colour : float * float * float;
flags : int}
let string_of_bookmark m =
Printf.sprintf "%i %s %s %b\n"
m.level
m.text
(Pdfwrite.string_of_pdf (Pdfdest.pdfobject_of_destination m.target))
m.isopen
let remove_bookmarks pdf =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| None -> raise (Pdf.PDFError "remove_boomarks: Bad PDF: no root")
| Some catalog ->
let catalog' = Pdf.remove_dict_entry catalog "/Outlines" in
let newcatalognum = Pdf.addobj pdf catalog' in
{pdf with
Pdf.root = newcatalognum;
Pdf.trailerdict =
Pdf.add_dict_entry
pdf.Pdf.trailerdict "/Root" (Pdf.Indirect newcatalognum)}
type ntree =
Br of int * Pdf.pdfobject * ntree list * bool (* bool is will_be_open *)
(*let rec print_tree (Br (i, obj, ls, isopen)) =
Printf.printf "Br (%i, %s, %b\n" i (Pdfwrite.string_of_pdf obj) isopen;
iter print_tree ls;
flprint ")\n"*)
let fresh source pdf =
incr source; pdf.Pdf.objects.Pdf.maxobjnum + !source
(* True if there are any open nodes in the tree *)
let rec any_open_nodes = function
| [] -> false
| (Br (_, _, ls, o))::r ->
o || any_open_nodes ls || any_open_nodes r
(* Total visible nodes in a tree *)
let rec total_visible = function
| [] -> 0
| (Br (_, _, ls, o))::r ->
1 + (if o then total_visible ls else 0) + total_visible r
(* Flatten a tree and produce a root object for it. Return a list of
(num, pdfobject) pairs with the root first. *)
let flatten_tree source pdf = function
| [] ->
let n = fresh source pdf in
[(n, Pdf.Dictionary [])], n
| tree ->
let root_objnum = fresh source pdf in
(* Add /Parent links to root *)
let tree =
let add_root_parent (Br (i, dict, children, isopen)) =
Br
(i,
Pdf.add_dict_entry dict "/Parent" (Pdf.Indirect root_objnum),
children,
isopen)
in
map add_root_parent tree
in
let rec really_flatten = function
Br (i, pdfobject, children, isopen) ->
(i, pdfobject) :: flatten (map really_flatten children)
in
let all_but_top = flatten (map really_flatten tree)
in let top, topnum =
(* Make top level from objects at first level of tree *)
let Br (first, _, _, _), Br (last, _, _, _) = extremes tree in
let count =
if any_open_nodes tree
then ["/Count", (Pdf.Integer (total_visible tree))]
else []
in
(root_objnum, Pdf.Dictionary
([("/First", Pdf.Indirect first); ("/Last", Pdf.Indirect last)] @ count)),
root_objnum
in
top::all_but_top, topnum
(* Add negative /Count entries to an ntree *)
let rec add_counts l = map add_count l
and add_count (Br (i, obj, ls, isopen)) =
let newobj =
if ls = [] then obj else
if isopen then
(* Calculate sum of the number of visible descendent items *)
Pdf.add_dict_entry obj "/Count" (Pdf.Integer (total_visible ls))
else
(* Negative - abs value is number of descendants which would be visible *)
Pdf.add_dict_entry obj "/Count" (Pdf.Integer (~-(total_visible ls)))
in
Br (i, newobj, add_counts ls, isopen)
(* Add /Parent entries to an ntree *)
let rec add_parent parent (Br (i, obj, children, isopen)) =
let obj' =
match parent with
| None -> obj
| Some parent_num ->
Pdf.add_dict_entry obj "/Parent" (Pdf.Indirect parent_num)
in
Br (i, obj', map (add_parent (Some i)) children, isopen)
(* Add /First and /Last entries to an ntree *)
let rec add_firstlast (Br (i, obj, children, isopen)) =
match children with
| [] -> (Br (i, obj, children, isopen))
| c ->
match extremes c with
Br (i', _, _, _), Br (i'', _, _, _) ->
let obj = Pdf.add_dict_entry obj "/First" (Pdf.Indirect i') in
let obj = Pdf.add_dict_entry obj "/Last" (Pdf.Indirect i'') in
(Br (i, obj, map add_firstlast children, isopen))
(* Add /Next and /Prev entries to an ntree *)
let rec add_next (Br (i, obj, children, isopen)) =
match children with
| [] -> Br (i, obj, children, isopen)
| [_] -> Br (i, obj, map add_next children, isopen)
| c::cs ->
let numbers = map (fun (Br (i, _, _, _)) -> i) cs in
let children' =
(map2
(fun (Br (i, obj, children, isopen)) nextnum ->
Br (i,
Pdf.add_dict_entry obj "/Next" (Pdf.Indirect nextnum),
children,
isopen))
(all_but_last (c::cs))
numbers)
@ [last cs]
in
Br (i, obj, map add_next children', isopen)
let rec add_prev (Br (i, obj, children, isopen)) =
match children with
| [] -> Br (i, obj, children, isopen)
| [_] -> Br (i, obj, map add_prev children, isopen)
| c::cs ->
let numbers = map (fun (Br (i, _, _, _)) -> i) (all_but_last (c::cs)) in
let children' =
c::
map2
(fun (Br (i, obj, children, isopen)) prevnum ->
Br (i,
Pdf.add_dict_entry obj "/Prev" (Pdf.Indirect prevnum),
children,
isopen))
cs
numbers
in
Br (i, obj, map add_prev children', isopen)
(* Make a node from a given title, destination page number in a given PDF ond
open flag. *)
let node_of_line pdf title target colour flags =
Pdf.Dictionary
(("/Title", Pdf.String title)::
let dest = Pdfdest.pdfobject_of_destination target in
if dest = Pdf.Null then [] else
(match target with Pdfdest.Action a -> [("/A", a)] | _ -> [("/Dest", dest)])
@ (match colour with (0., 0., 0.) -> [] | (r, g, b) -> [("/C", Pdf.Array [Pdf.Real r; Pdf.Real g; Pdf.Real b])])
@ (match flags with 0 -> [] | _ -> [("/F", Pdf.Integer flags)]))
(* Make an ntree list from a list of parsed bookmark lines. *)
let rec make_outline_ntree source pdf = function
| [] -> []
| h::t ->
let lower, rest = cleavewhile (fun {level = n'} -> n' > h.level) t in
(*Printf.printf "make_outline_ntree: %s\n" h.text;*)
let node = node_of_line pdf h.text h.target h.colour h.flags in
Br (fresh source pdf, node, make_outline_ntree source pdf lower, h.isopen)
::make_outline_ntree source pdf rest
(* Add bookmarks. *)
let add_bookmarks parsed pdf =
if parsed = [] then remove_bookmarks pdf else
let source = ref 0 in
let tree = make_outline_ntree source pdf parsed in
(* Build the (object number, bookmark tree object) pairs. *)
let pairs, tree_root_num =
let tree = map add_firstlast tree in
let tree =
match add_next (add_prev (Br (0, Pdf.Null, tree, false))) with
Br (_, _, children, _) -> children
in
let tree = add_counts (map (add_parent None) tree) in
flatten_tree source pdf tree
in
(* Add the objects to the pdf *)
iter (function x -> ignore (Pdf.addobj_given_num pdf x)) pairs;
(* Replace the /Outlines entry in the document catalog. *)
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| None -> raise (Pdf.PDFError "Bad PDF: no root")
| Some catalog ->
let catalog' =
Pdf.add_dict_entry catalog "/Outlines" (Pdf.Indirect tree_root_num)
in
let newcatalognum = Pdf.addobj pdf catalog' in
{pdf with
Pdf.root = newcatalognum;
Pdf.trailerdict =
Pdf.add_dict_entry
pdf.Pdf.trailerdict "/Root" (Pdf.Indirect newcatalognum)}
(* Read bookmarks *)
let rec traverse_outlines_lb ~preserve_actions indent_lb pdf outlines output =
match Pdf.lookup_direct pdf "/First" outlines with
| None -> ()
| Some first -> do_until_no_next_lb ~preserve_actions indent_lb pdf first output
and do_until_no_next_lb ~preserve_actions indent_lb pdf outline output =
let title =
match Pdf.lookup_direct pdf "/Title" outline with
| Some (Pdf.String s) -> s
| _ ->
Pdfe.log "/Title not a string or not present in document outline entry. Using the empty string.\n";
""
in
begin let page =
match Pdf.lookup_direct pdf "/Dest" outline with
| Some (Pdf.String stringdest) when preserve_actions ->
Pdfdest.NamedDestinationElsewhere stringdest
| Some dest ->
Pdfdest.read_destination pdf dest
| None ->
match Pdf.lookup_direct pdf "/A" outline with
| None -> Pdfdest.NullDestination
| Some action ->
if preserve_actions then
(* 05/03/2024: Only preserve if there is a name or byte string. If it's a direct destination, don't preserve. *)
match Pdf.lookup_direct pdf "/D" action with
| Some ((Pdf.Array _) as dest) -> Pdfdest.read_destination pdf dest
| _ -> Pdfdest.Action (Pdf.direct pdf action)
else
match Pdf.lookup_direct pdf "/D" action with
| None -> Pdfdest.Action (Pdf.direct pdf action)
| Some dest -> Pdfdest.read_destination pdf dest
in let opn =
match Pdf.lookup_direct pdf "/Count" outline with
| Some (Pdf.Integer i) when i > 0 -> true
| _ -> false
in
let colour =
try
begin match Pdf.lookup_direct pdf "/C" outline with
| Some (Pdf.Array [a; b; c]) -> (Pdf.getnum pdf a, Pdf.getnum pdf b, Pdf.getnum pdf c)
| _ -> (0., 0., 0.)
end
with _ -> (0., 0., 0.)
in
let flags =
match Pdf.lookup_direct pdf "/F" outline with
| Some (Pdf.Integer i) -> i
| _ -> 0
in
output
{level = !indent_lb; text = title; target = page; isopen = opn; colour; flags}
end;
incr indent_lb;
traverse_outlines_lb ~preserve_actions indent_lb pdf outline output;
if !indent_lb > 0 then decr indent_lb;
begin match Pdf.lookup_direct pdf "/Next" outline with
| None -> ()
| Some outline -> do_until_no_next_lb ~preserve_actions indent_lb pdf outline output
end
let read_bookmarks ?(preserve_actions=false) pdf =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| None -> raise (Pdf.PDFError "read_bookmarks - Bad PDF: no root")
| Some catalog ->
match Pdf.lookup_direct pdf "/Outlines" catalog with
| None -> []
| Some outlines ->
let out = ref [] in
let output = (function b -> out := b::!out) in
traverse_outlines_lb ~preserve_actions (ref 0) pdf outlines output;
rev !out
let transform_bookmark tr m =
{m with target = Pdfdest.transform_destination tr m.target}