-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpagenode.py
50 lines (44 loc) · 1.23 KB
/
pagenode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from BeautifulSoup import *
import cairo
import boxmodel
TYPES_TO_IGNORE = (Declaration, Comment)
TAGS_TO_IGNORE = ("script", "style")
class PageNode:
""" This is a basic, inline page element. Block elements will come later
"""
children = None
def __init__(self, soup, parent):
self.parent = parent
if soup.__class__ in TYPES_TO_IGNORE:
self.type = 'ignored'
elif isinstance(soup, Tag):
self.type = 'tag'
self.attributes = dict(soup.attrs)
if soup.name not in TAGS_TO_IGNORE:
self.children = [PageNode(node, self)
for node in soup.contents]
self.tagtype = soup.name
elif isinstance(soup, NavigableString):
self.type = 'text'
else:
self.type = 'unknown'
self.text = str(soup)
def makeBox(self):
if self.type is 'text':
return boxmodel.TextBox(self.text)
if self.type == 'ignored': return None
box = boxmodel.LineBox()
if self.children:
for child in self.children:
subbox = child.makeBox()
box.addSubBox(subbox)
self.box = box
return box
def makeImage(self, width):
self.box.setWidth(width)
height = self.box.getHeight()
img = cairo.ImageSurface(cairo.FORMAT_ARGB32,
width, int(height))
ct = cairo.Context(img)
self.box.draw(ct,0,0, width);
return img