-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathpdfpagelabels.ml
220 lines (200 loc) · 6.85 KB
/
pdfpagelabels.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
(* Page Labels. 1-based. All functions assume input lists of page labels are
well-formed. *)
open Pdfutil
type labelstyle =
| DecimalArabic
| UppercaseRoman
| LowercaseRoman
| UppercaseLetters
| LowercaseLetters
| NoLabelPrefixOnly
type t =
{labelstyle : labelstyle;
labelprefix : string option;
startpage : int;
startvalue : int}
let string_of_labelstyle = function
| DecimalArabic -> "DecimalArabic"
| UppercaseRoman -> "UppercaseRoman"
| LowercaseRoman -> "LowercaseRoman"
| UppercaseLetters -> "UppercaseLetters"
| LowercaseLetters -> "LowercaseLetters"
| NoLabelPrefixOnly -> "NoLabelPrefixOnly"
let string_of_pagelabel l =
(Printf.sprintf "labelstyle: %s\n" (string_of_labelstyle l.labelstyle)) ^
(Printf.sprintf "labelprefix: %s\n"
(match l.labelprefix with None -> "None" | Some s -> s)) ^
(Printf.sprintf "startpage: %i\n" l.startpage) ^
(Printf.sprintf "startvalue: %s\n" (string_of_int l.startvalue))
let label_of_range pdf (startpage, thing) =
let startpage =
match startpage with
| Pdf.Integer i -> i + 1
| _ -> Pdfe.log "Bad Number Tree\n"; 1
and labelstyle =
match Pdf.lookup_direct pdf "/S" thing with
| Some (Pdf.Name "/D") -> DecimalArabic
| Some (Pdf.Name "/R") -> UppercaseRoman
| Some (Pdf.Name "/r") -> LowercaseRoman
| Some (Pdf.Name "/A") -> UppercaseLetters
| Some (Pdf.Name "/a") -> LowercaseLetters
| _ -> NoLabelPrefixOnly
and labelprefix =
match Pdf.lookup_direct pdf "/P" thing with
| Some (Pdf.String s) -> Some s
| _ -> None
and startvalue =
match Pdf.lookup_direct pdf "/St" thing with
| Some (Pdf.Integer i) -> i
| _ -> 1
in
{labelstyle = labelstyle;
labelprefix = labelprefix;
startpage = startpage;
startvalue = startvalue}
let read pdf =
match
Pdf.lookup_direct pdf "/PageLabels" (Pdf.lookup_obj pdf pdf.Pdf.root)
with
| None -> []
| Some labeltree ->
let labelranges = Pdf.contents_of_nametree pdf labeltree in
map (label_of_range pdf) labelranges
(** Add a label, rearranging existing labels. *)
let add_label endpage ls l e =
let beforeorduringorequal, after =
List.partition (function x -> x.startpage <= e) ls
in
let beforeorduring =
lose (function x -> x.startpage = l.startpage) beforeorduringorequal
in
let replica =
match after with
| _ when e = endpage -> []
| x::_ when x.startpage = e + 1 -> []
| _ ->
match beforeorduringorequal with [] -> [] | _ ->
let lst = last beforeorduringorequal in
[{lst with
startpage = e + 1;
startvalue = e + 1 + (lst.startvalue - lst.startpage)}]
and before =
lose
(function x -> x.startpage > l.startpage && x.startpage <= e)
beforeorduring
in
before @ [l] @ replica @ after
let basic =
{labelstyle = DecimalArabic;
labelprefix = None;
startpage = 1;
startvalue = 1}
(** Make a complete set, so that each page has a number *)
let complete = function
| [] -> [basic]
| x::xs when x.startpage > 1 -> basic::x::xs
| ls -> ls
let letter_string n =
implode (many (char_of_int ((n - 1) mod 26 + 65)) (((n - 1) / 26) + 1))
(* Make a page label string *)
let string_of_pagenumber n = function
| NoLabelPrefixOnly -> ""
| DecimalArabic -> string_of_int n
| UppercaseRoman -> roman_upper n
| LowercaseRoman -> roman_lower n
| UppercaseLetters -> letter_string n
| LowercaseLetters -> String.lowercase_ascii (letter_string n)
let pagelabeltext_of_single n l =
let realnumber =
n - (l.startpage - l.startvalue)
in
begin match l.labelprefix with None -> "" | Some s -> s end ^
string_of_pagenumber realnumber l.labelstyle
let rec pagelabeltext_of_pagenumber n = function
| [] -> raise Not_found
| [x] -> pagelabeltext_of_single n x
| x::y::_ when n < y.startpage -> pagelabeltext_of_single n x
| _::r -> pagelabeltext_of_pagenumber n r
(* Just make a page label for a single one *)
let pagelabel_of_single n l =
let realnumber =
n - (l.startpage - l.startvalue)
in
{l with startpage = realnumber; startvalue = realnumber}
let rec pagelabel_of_pagenumber n = function
| [] -> raise Not_found
| [x] -> pagelabel_of_single n x
| x::y::_ when n < y.startpage -> pagelabel_of_single n x
| _::r -> pagelabel_of_pagenumber n r
let rec coalesce prev = function
| [] -> rev prev
| [x] -> rev (x::prev)
| x::y::r
when x.labelstyle = y.labelstyle &&
x.labelprefix = y.labelprefix &&
y.startpage - y.startvalue = x.startpage - x.startvalue ->
coalesce prev (x::r)
| x::r -> coalesce (x::prev) r
let coalesce ls = coalesce [] ls
(* Merging page labels. Requires that pdfs, ranges non-empty and same length. *)
let merge_pagelabels pdfs ranges =
let completed = map complete (map read pdfs) in
let new_labels =
map2
(fun labels range ->
map (fun p -> pagelabel_of_pagenumber p labels) range)
completed
ranges
in
let change_labels ls =
map2
(fun l sp -> {l with startpage = sp})
ls
(indx (flatten ranges))
in
coalesce (change_labels (flatten new_labels))
let remove pdf =
let root = Pdf.lookup_obj pdf pdf.Pdf.root in
let rootnum =
Pdf.addobj pdf (Pdf.remove_dict_entry root "/PageLabels")
in
pdf.Pdf.root <- rootnum;
pdf.Pdf.trailerdict <-
Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)
(* For now, just a flat number tree. Doesn't check ranges are well-formed (i.e
contiguous / nonoverlapping) *)
let write pdf labels =
if labels = [] then remove pdf else
let arr =
flatten
(map
(function label ->
[Pdf.Integer (label.startpage - 1);
Pdf.Dictionary
((match label.labelstyle with
| NoLabelPrefixOnly -> []
| DecimalArabic -> [("/S", Pdf.Name "/D")]
| UppercaseRoman -> [("/S", Pdf.Name "/R")]
| LowercaseRoman -> [("/S", Pdf.Name "/r")]
| UppercaseLetters -> [("/S", Pdf.Name "/A")]
| LowercaseLetters -> [("/S", Pdf.Name "/a")])
@
(match label.labelprefix with
| None -> []
| Some p -> [("/P", Pdf.String p)])
@
(match label.startvalue with
| 1 -> []
| s -> [("/St", Pdf.Integer s)]))])
labels)
in
let root = Pdf.lookup_obj pdf pdf.Pdf.root in
let rootnum =
Pdf.addobj pdf
(Pdf.add_dict_entry
root "/PageLabels" (Pdf.Dictionary ["/Nums", Pdf.Array arr]))
in
pdf.Pdf.root <- rootnum;
pdf.Pdf.trailerdict <-
Pdf.add_dict_entry
pdf.Pdf.trailerdict "/Root" (Pdf.Indirect rootnum)