-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nodes.py
2222 lines (1771 loc) · 76.1 KB
/
nodes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# $Id: nodes.py 8246 2019-02-12 17:56:51Z milde $
# Author: David Goodger <goodger@python.org>
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.
"""
Docutils document tree element class library.
Classes in CamelCase are abstract base classes or auxiliary classes. The one
exception is `Text`, for a text (PCDATA) node; uppercase is used to
differentiate from element classes. Classes in lower_case_with_underscores
are element classes, matching the XML element generic identifiers in the DTD_.
The position of each node (the level at which it can occur) is significant and
is represented by abstract base classes (`Root`, `Structural`, `Body`,
`Inline`, etc.). Certain transformations will be easier because we can use
``isinstance(node, base_class)`` to determine the position of the node in the
hierarchy.
.. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
"""
__docformat__ = 'reStructuredText'
import sys
import os
import re
import warnings
import types
import unicodedata
import docutils.utils
# ==============================
# Functional Node Base Classes
# ==============================
class Node(object):
"""Abstract base class of nodes in a document tree."""
parent = None
"""Back-reference to the Node immediately containing this Node."""
document = None
"""The `document` node at the root of the tree containing this Node."""
source = None
"""Path or description of the input source which generated this Node."""
line = None
"""The line number (1-based) of the beginning of this Node in `source`."""
def __nonzero__(self):
"""
Node instances are always true, even if they're empty. A node is more
than a simple container. Its boolean "truth" does not depend on
having one or more subnodes in the doctree.
Use `len()` to check node length. Use `None` to represent a boolean
false value.
"""
return True
if sys.version_info < (3,):
# on 2.x, str(node) will be a byte string with Unicode
# characters > 255 escaped; on 3.x this is no longer necessary
def __str__(self):
return unicode(self).encode('raw_unicode_escape')
def asdom(self, dom=None):
"""Return a DOM **fragment** representation of this Node."""
if dom is None:
import xml.dom.minidom as dom
domroot = dom.Document()
return self._dom_node(domroot)
def pformat(self, indent=' ', level=0):
"""
Return an indented pseudo-XML representation, for test purposes.
Override in subclasses.
"""
raise NotImplementedError
def copy(self):
"""Return a copy of self."""
raise NotImplementedError
def deepcopy(self):
"""Return a deep copy of self (also copying children)."""
raise NotImplementedError
def astext(self):
"""Return a string representation of this Node."""
raise NotImplementedError
def setup_child(self, child):
child.parent = self
if self.document:
child.document = self.document
if child.source is None:
child.source = self.document.current_source
if child.line is None:
child.line = self.document.current_line
def walk(self, visitor):
"""
Traverse a tree of `Node` objects, calling the
`dispatch_visit()` method of `visitor` when entering each
node. (The `walkabout()` method is similar, except it also
calls the `dispatch_departure()` method before exiting each
node.)
This tree traversal supports limited in-place tree
modifications. Replacing one node with one or more nodes is
OK, as is removing an element. However, if the node removed
or replaced occurs after the current node, the old node will
still be traversed, and any new nodes will not.
Within ``visit`` methods (and ``depart`` methods for
`walkabout()`), `TreePruningException` subclasses may be raised
(`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
Parameter `visitor`: A `NodeVisitor` object, containing a
``visit`` implementation for each `Node` subclass encountered.
Return true if we should stop the traversal.
"""
stop = False
visitor.document.reporter.debug(
'docutils.nodes.Node.walk calling dispatch_visit for %s'
% self.__class__.__name__)
try:
try:
visitor.dispatch_visit(self)
except (SkipChildren, SkipNode):
return stop
except SkipDeparture: # not applicable; ignore
pass
children = self.children
try:
for child in children[:]:
if child.walk(visitor):
stop = True
break
except SkipSiblings:
pass
except StopTraversal:
stop = True
return stop
def walkabout(self, visitor):
"""
Perform a tree traversal similarly to `Node.walk()` (which
see), except also call the `dispatch_departure()` method
before exiting each node.
Parameter `visitor`: A `NodeVisitor` object, containing a
``visit`` and ``depart`` implementation for each `Node`
subclass encountered.
Return true if we should stop the traversal.
"""
call_depart = True
stop = False
visitor.document.reporter.debug(
'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
% self.__class__.__name__)
try:
try:
visitor.dispatch_visit(self)
except SkipNode:
return stop
except SkipDeparture:
call_depart = False
children = self.children
try:
for child in children[:]:
if child.walkabout(visitor):
stop = True
break
except SkipSiblings:
pass
except SkipChildren:
pass
except StopTraversal:
stop = True
if call_depart:
visitor.document.reporter.debug(
'docutils.nodes.Node.walkabout calling dispatch_departure '
'for %s' % self.__class__.__name__)
visitor.dispatch_departure(self)
return stop
def _fast_traverse(self, cls):
"""Specialized traverse() that only supports instance checks."""
result = []
if isinstance(self, cls):
result.append(self)
for child in self.children:
result.extend(child._fast_traverse(cls))
return result
def _all_traverse(self):
"""Specialized traverse() that doesn't check for a condition."""
result = []
result.append(self)
for child in self.children:
result.extend(child._all_traverse())
return result
def traverse(self, condition=None, include_self=True, descend=True,
siblings=False, ascend=False):
"""
Return an iterable containing
* self (if include_self is true)
* all descendants in tree traversal order (if descend is true)
* all siblings (if siblings is true) and their descendants (if
also descend is true)
* the siblings of the parent (if ascend is true) and their
descendants (if also descend is true), and so on
If `condition` is not None, the iterable contains only nodes
for which ``condition(node)`` is true. If `condition` is a
node class ``cls``, it is equivalent to a function consisting
of ``return isinstance(node, cls)``.
If ascend is true, assume siblings to be true as well.
For example, given the following tree::
<paragraph>
<emphasis> <--- emphasis.traverse() and
<strong> <--- strong.traverse() are called.
Foo
Bar
<reference name="Baz" refid="baz">
Baz
Then list(emphasis.traverse()) equals ::
[<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
and list(strong.traverse(ascend=True)) equals ::
[<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
"""
if ascend:
siblings=True
# Check for special argument combinations that allow using an
# optimized version of traverse()
if include_self and descend and not siblings:
if condition is None:
return self._all_traverse()
elif isinstance(condition, (types.ClassType, type)):
return self._fast_traverse(condition)
# Check if `condition` is a class (check for TypeType for Python
# implementations that use only new-style classes, like PyPy).
if isinstance(condition, (types.ClassType, type)):
node_class = condition
def condition(node, node_class=node_class):
return isinstance(node, node_class)
r = []
if include_self and (condition is None or condition(self)):
r.append(self)
if descend and len(self.children):
for child in self:
r.extend(child.traverse(include_self=True, descend=True,
siblings=False, ascend=False,
condition=condition))
if siblings or ascend:
node = self
while node.parent:
index = node.parent.index(node)
for sibling in node.parent[index+1:]:
r.extend(sibling.traverse(include_self=True,
descend=descend,
siblings=False, ascend=False,
condition=condition))
if not ascend:
break
else:
node = node.parent
return r
def next_node(self, condition=None, include_self=False, descend=True,
siblings=False, ascend=False):
"""
Return the first node in the iterable returned by traverse(),
or None if the iterable is empty.
Parameter list is the same as of traverse. Note that
include_self defaults to 0, though.
"""
iterable = self.traverse(condition=condition,
include_self=include_self, descend=descend,
siblings=siblings, ascend=ascend)
try:
return iterable[0]
except IndexError:
return None
if sys.version_info < (3,):
class reprunicode(unicode):
"""
A unicode sub-class that removes the initial u from unicode's repr.
"""
def __repr__(self):
return unicode.__repr__(self)[1:]
else:
reprunicode = unicode
def ensure_str(s):
"""
Failsave conversion of `unicode` to `str`.
"""
if sys.version_info < (3,) and isinstance(s, unicode):
return s.encode('ascii', 'backslashreplace')
return s
class Text(Node, reprunicode):
"""
Instances are terminal nodes (leaves) containing text only; no child
nodes or attributes. Initialize by passing a string to the constructor.
Access the text itself with the `astext` method.
"""
tagname = '#text'
children = ()
"""Text nodes have no children, and cannot have children."""
if sys.version_info > (3,):
def __new__(cls, data, rawsource=None):
"""Prevent the rawsource argument from propagating to str."""
if isinstance(data, bytes):
raise TypeError('expecting str data, not bytes')
return reprunicode.__new__(cls, data)
else:
def __new__(cls, data, rawsource=None):
"""Prevent the rawsource argument from propagating to str."""
return reprunicode.__new__(cls, data)
def __init__(self, data, rawsource=''):
self.rawsource = rawsource
"""The raw text from which this element was constructed."""
def shortrepr(self, maxlen=18):
data = self
if len(data) > maxlen:
data = data[:maxlen-4] + ' ...'
return '<%s: %r>' % (self.tagname, reprunicode(data))
def __repr__(self):
return self.shortrepr(maxlen=68)
def _dom_node(self, domroot):
return domroot.createTextNode(unicode(self))
def astext(self):
return reprunicode(docutils.utils.unescape(self))
# Note about __unicode__: The implementation of __unicode__ here,
# and the one raising NotImplemented in the superclass Node had
# to be removed when changing Text to a subclass of unicode instead
# of UserString, since there is no way to delegate the __unicode__
# call to the superclass unicode:
# unicode itself does not have __unicode__ method to delegate to
# and calling unicode(self) or unicode.__new__ directly creates
# an infinite loop
def copy(self):
return self.__class__(reprunicode(self), rawsource=self.rawsource)
def deepcopy(self):
return self.copy()
def pformat(self, indent=' ', level=0):
indent = indent * level
lines = [indent+line for line in self.astext().splitlines()]
if not lines:
return ''
return '\n'.join(lines) + '\n'
# rstrip and lstrip are used by substitution definitions where
# they are expected to return a Text instance, this was formerly
# taken care of by UserString.
def rstrip(self, chars=None):
return self.__class__(reprunicode.rstrip(self, chars), self.rawsource)
def lstrip(self, chars=None):
return self.__class__(reprunicode.lstrip(self, chars), self.rawsource)
class Element(Node):
"""
`Element` is the superclass to all specific elements.
Elements contain attributes and child nodes. Elements emulate
dictionaries for attributes, indexing by attribute name (a string). To
set the attribute 'att' to 'value', do::
element['att'] = 'value'
There are two special attributes: 'ids' and 'names'. Both are
lists of unique identifiers, and names serve as human interfaces
to IDs. Names are case- and whitespace-normalized (see the
fully_normalize_name() function), and IDs conform to the regular
expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
Elements also emulate lists for child nodes (element nodes and/or text
nodes), indexing by integer. To get the first child node, use::
element[0]
Elements may be constructed using the ``+=`` operator. To add one new
child node to element, do::
element += node
This is equivalent to ``element.append(node)``.
To add a list of multiple child nodes at once, use the same ``+=``
operator::
element += [node1, node2]
This is equivalent to ``element.extend([node1, node2])``.
"""
basic_attributes = ('ids', 'classes', 'names', 'dupnames')
"""List attributes which are defined for every Element-derived class
instance and can be safely transferred to a different node."""
local_attributes = ('backrefs',)
"""A list of class-specific attributes that should not be copied with the
standard attributes when replacing a node.
NOTE: Derived classes should override this value to prevent any of its
attributes being copied by adding to the value in its parent class."""
list_attributes = basic_attributes + local_attributes
"""List attributes, automatically initialized to empty lists for
all nodes."""
known_attributes = list_attributes + ('source', 'rawsource')
"""List attributes that are known to the Element base class."""
tagname = None
"""The element generic identifier. If None, it is set as an instance
attribute to the name of the class."""
child_text_separator = '\n\n'
"""Separator for child nodes, used by `astext()` method."""
def __init__(self, rawsource='', *children, **attributes):
self.rawsource = rawsource
"""The raw text from which this element was constructed.
NOTE: some elements do not set this value (default '').
"""
self.children = []
"""List of child nodes (elements and/or `Text`)."""
self.extend(children) # maintain parent info
self.attributes = {}
"""Dictionary of attribute {name: value}."""
# Initialize list attributes.
for att in self.list_attributes:
self.attributes[att] = []
for att, value in attributes.items():
att = att.lower()
if att in self.list_attributes:
# mutable list; make a copy for this node
self.attributes[att] = value[:]
else:
self.attributes[att] = value
if self.tagname is None:
self.tagname = self.__class__.__name__
def _dom_node(self, domroot):
element = domroot.createElement(self.tagname)
for attribute, value in self.attlist():
if isinstance(value, list):
value = ' '.join([serial_escape('%s' % (v,)) for v in value])
element.setAttribute(attribute, '%s' % value)
for child in self.children:
element.appendChild(child._dom_node(domroot))
return element
def __repr__(self):
data = ''
for c in self.children:
data += c.shortrepr()
if len(data) > 60:
data = data[:56] + ' ...'
break
if self['names']:
return '<%s "%s": %s>' % (self.__class__.__name__,
'; '.join([ensure_str(n) for n in self['names']]), data)
else:
return '<%s: %s>' % (self.__class__.__name__, data)
def shortrepr(self):
if self['names']:
return '<%s "%s"...>' % (self.__class__.__name__,
'; '.join([ensure_str(n) for n in self['names']]))
else:
return '<%s...>' % self.tagname
def __unicode__(self):
if self.children:
return u'%s%s%s' % (self.starttag(),
''.join([unicode(c) for c in self.children]),
self.endtag())
else:
return self.emptytag()
if sys.version_info > (3,):
# 2to3 doesn't convert __unicode__ to __str__
__str__ = __unicode__
def starttag(self, quoteattr=None):
# the optional arg is used by the docutils_xml writer
if quoteattr is None:
quoteattr = pseudo_quoteattr
parts = [self.tagname]
for name, value in self.attlist():
if value is None: # boolean attribute
parts.append('%s="True"' % name)
continue
if isinstance(value, list):
values = [serial_escape('%s' % (v,)) for v in value]
value = ' '.join(values)
else:
value = unicode(value)
value = quoteattr(value)
parts.append(u'%s=%s' % (name, value))
return u'<%s>' % u' '.join(parts)
def endtag(self):
return '</%s>' % self.tagname
def emptytag(self):
return u'<%s/>' % u' '.join([self.tagname] +
['%s="%s"' % (n, v)
for n, v in self.attlist()])
def __len__(self):
return len(self.children)
def __contains__(self, key):
# support both membership test for children and attributes
# (has_key is translated to "in" by 2to3)
if isinstance(key, basestring):
return key in self.attributes
return key in self.children
def __getitem__(self, key):
if isinstance(key, basestring):
return self.attributes[key]
elif isinstance(key, int):
return self.children[key]
elif isinstance(key, types.SliceType):
assert key.step in (None, 1), 'cannot handle slice with stride'
return self.children[key.start:key.stop]
else:
raise TypeError, ('element index must be an integer, a slice, or '
'an attribute name string')
def __setitem__(self, key, item):
if isinstance(key, basestring):
self.attributes[str(key)] = item
elif isinstance(key, int):
self.setup_child(item)
self.children[key] = item
elif isinstance(key, types.SliceType):
assert key.step in (None, 1), 'cannot handle slice with stride'
for node in item:
self.setup_child(node)
self.children[key.start:key.stop] = item
else:
raise TypeError, ('element index must be an integer, a slice, or '
'an attribute name string')
def __delitem__(self, key):
if isinstance(key, basestring):
del self.attributes[key]
elif isinstance(key, int):
del self.children[key]
elif isinstance(key, types.SliceType):
assert key.step in (None, 1), 'cannot handle slice with stride'
del self.children[key.start:key.stop]
else:
raise TypeError, ('element index must be an integer, a simple '
'slice, or an attribute name string')
def __add__(self, other):
return self.children + other
def __radd__(self, other):
return other + self.children
def __iadd__(self, other):
"""Append a node or a list of nodes to `self.children`."""
if isinstance(other, Node):
self.append(other)
elif other is not None:
self.extend(other)
return self
def astext(self):
return self.child_text_separator.join(
[child.astext() for child in self.children])
def non_default_attributes(self):
atts = {}
for key, value in self.attributes.items():
if self.is_not_default(key):
atts[key] = value
return atts
def attlist(self):
attlist = self.non_default_attributes().items()
attlist.sort()
return attlist
def get(self, key, failobj=None):
return self.attributes.get(key, failobj)
def hasattr(self, attr):
return attr in self.attributes
def delattr(self, attr):
if attr in self.attributes:
del self.attributes[attr]
def setdefault(self, key, failobj=None):
return self.attributes.setdefault(key, failobj)
has_key = hasattr
# support operator ``in``
__contains__ = hasattr
def get_language_code(self, fallback=''):
"""Return node's language tag.
Look iteratively in self and parents for a class argument
starting with ``language-`` and return the remainder of it
(which should be a `BCP49` language tag) or the `fallback`.
"""
for cls in self.get('classes', []):
if cls.startswith('language-'):
return cls[9:]
try:
return self.parent.get_language(fallback)
except AttributeError:
return fallback
def append(self, item):
self.setup_child(item)
self.children.append(item)
def extend(self, item):
for node in item:
self.append(node)
def insert(self, index, item):
if isinstance(item, Node):
self.setup_child(item)
self.children.insert(index, item)
elif item is not None:
self[index:index] = item
def pop(self, i=-1):
return self.children.pop(i)
def remove(self, item):
self.children.remove(item)
def index(self, item):
return self.children.index(item)
def is_not_default(self, key):
if self[key] == [] and key in self.list_attributes:
return 0
else:
return 1
def update_basic_atts(self, dict_):
"""
Update basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') from node or dictionary `dict_`.
"""
if isinstance(dict_, Node):
dict_ = dict_.attributes
for att in self.basic_attributes:
self.append_attr_list(att, dict_.get(att, []))
def append_attr_list(self, attr, values):
"""
For each element in values, if it does not exist in self[attr], append
it.
NOTE: Requires self[attr] and values to be sequence type and the
former should specifically be a list.
"""
# List Concatenation
for value in values:
if not value in self[attr]:
self[attr].append(value)
def coerce_append_attr_list(self, attr, value):
"""
First, convert both self[attr] and value to a non-string sequence
type; if either is not already a sequence, convert it to a list of one
element. Then call append_attr_list.
NOTE: self[attr] and value both must not be None.
"""
# List Concatenation
if not isinstance(self.get(attr), list):
self[attr] = [self[attr]]
if not isinstance(value, list):
value = [value]
self.append_attr_list(attr, value)
def replace_attr(self, attr, value, force = True):
"""
If self[attr] does not exist or force is True or omitted, set
self[attr] to value, otherwise do nothing.
"""
# One or the other
if force or self.get(attr) is None:
self[attr] = value
def copy_attr_convert(self, attr, value, replace = True):
"""
If attr is an attribute of self, set self[attr] to
[self[attr], value], otherwise set self[attr] to value.
NOTE: replace is not used by this function and is kept only for
compatibility with the other copy functions.
"""
if self.get(attr) is not value:
self.coerce_append_attr_list(attr, value)
def copy_attr_coerce(self, attr, value, replace):
"""
If attr is an attribute of self and either self[attr] or value is a
list, convert all non-sequence values to a sequence of 1 element and
then concatenate the two sequence, setting the result to self[attr].
If both self[attr] and value are non-sequences and replace is True or
self[attr] is None, replace self[attr] with value. Otherwise, do
nothing.
"""
if self.get(attr) is not value:
if isinstance(self.get(attr), list) or \
isinstance(value, list):
self.coerce_append_attr_list(attr, value)
else:
self.replace_attr(attr, value, replace)
def copy_attr_concatenate(self, attr, value, replace):
"""
If attr is an attribute of self and both self[attr] and value are
lists, concatenate the two sequences, setting the result to
self[attr]. If either self[attr] or value are non-sequences and
replace is True or self[attr] is None, replace self[attr] with value.
Otherwise, do nothing.
"""
if self.get(attr) is not value:
if isinstance(self.get(attr), list) and \
isinstance(value, list):
self.append_attr_list(attr, value)
else:
self.replace_attr(attr, value, replace)
def copy_attr_consistent(self, attr, value, replace):
"""
If replace is True or self[attr] is None, replace self[attr] with
value. Otherwise, do nothing.
"""
if self.get(attr) is not value:
self.replace_attr(attr, value, replace)
def update_all_atts(self, dict_, update_fun = copy_attr_consistent,
replace = True, and_source = False):
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_, the two values are
merged based on the value of update_fun. Generally, when replace is
True, the values in self are replaced or merged with the values in
dict_; otherwise, the values in self may be preserved or merged. When
and_source is True, the 'source' attribute is included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
NOTE: It is easier to call the update-specific methods then to pass
the update_fun method to this function.
"""
if isinstance(dict_, Node):
dict_ = dict_.attributes
# Include the source attribute when copying?
if and_source:
filter_fun = self.is_not_list_attribute
else:
filter_fun = self.is_not_known_attribute
# Copy the basic attributes
self.update_basic_atts(dict_)
# Grab other attributes in dict_ not in self except the
# (All basic attributes should be copied already)
for att in filter(filter_fun, dict_):
update_fun(self, att, dict_[att], replace)
def update_all_atts_consistantly(self, dict_, replace = True,
and_source = False):
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ and replace is True, the
values in self are replaced with the values in dict_; otherwise, the
values in self are preserved. When and_source is True, the 'source'
attribute is included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
and_source)
def update_all_atts_concatenating(self, dict_, replace = True,
and_source = False):
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ whose values aren't each
lists and replace is True, the values in self are replaced with the
values in dict_; if the values from self and dict_ for the given
identifier are both of list type, then the two lists are concatenated
and the result stored in self; otherwise, the values in self are
preserved. When and_source is True, the 'source' attribute is
included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
and_source)
def update_all_atts_coercion(self, dict_, replace = True,
and_source = False):
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ whose values are both
not lists and replace is True, the values in self are replaced with
the values in dict_; if either of the values from self and dict_ for
the given identifier are of list type, then first any non-lists are
converted to 1-element lists and then the two lists are concatenated
and the result stored in self; otherwise, the values in self are
preserved. When and_source is True, the 'source' attribute is
included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
and_source)
def update_all_atts_convert(self, dict_, and_source = False):
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ then first any non-lists
are converted to 1-element lists and then the two lists are
concatenated and the result stored in self; otherwise, the values in
self are preserved. When and_source is True, the 'source' attribute
is included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_convert,
and_source = and_source)
def clear(self):
self.children = []
def replace(self, old, new):
"""Replace one child `Node` with another child or children."""
index = self.index(old)
if isinstance(new, Node):
self.setup_child(new)
self[index] = new
elif new is not None:
self[index:index+1] = new
def replace_self(self, new):
"""
Replace `self` node with `new`, where `new` is a node or a
list of nodes.
"""
update = new
if not isinstance(new, Node):
# `new` is a list; update first child.
try:
update = new[0]
except IndexError:
update = None
if isinstance(update, Element):
update.update_basic_atts(self)
else:
# `update` is a Text node or `new` is an empty list.
# Assert that we aren't losing any attributes.
for att in self.basic_attributes:
assert not self[att], \
'Losing "%s" attribute: %s' % (att, self[att])
self.parent.replace(self, new)
def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
"""
Return the index of the first child whose class exactly matches.
Parameters:
- `childclass`: A `Node` subclass to search for, or a tuple of `Node`
classes. If a tuple, any of the classes may match.
- `start`: Initial index to check.
- `end`: Initial index to *not* check.
"""
if not isinstance(childclass, tuple):
childclass = (childclass,)
for index in range(start, min(len(self), end)):
for c in childclass:
if isinstance(self[index], c):
return index
return None
def first_child_not_matching_class(self, childclass, start=0,
end=sys.maxint):
"""
Return the index of the first child whose class does *not* match.
Parameters:
- `childclass`: A `Node` subclass to skip, or a tuple of `Node`
classes. If a tuple, none of the classes may match.
- `start`: Initial index to check.
- `end`: Initial index to *not* check.
"""
if not isinstance(childclass, tuple):
childclass = (childclass,)
for index in range(start, min(len(self), end)):
for c in childclass:
if isinstance(self.children[index], c):
break
else: